460 files changed, 25584 insertions, 11774 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 986efcfb9..70e1bba67 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,16 +1,27 @@
 <!--
 Please keep in mind yuzu is EXPERIMENTAL SOFTWARE.
 
-Please read the FAQ: https://yuzu-emu.org/wiki/faq/
+Please read the FAQ:
+https://yuzu-emu.org/wiki/faq/
 
-When submitting an issue, please do the following:
+THIS IS NOT A SUPPORT FORUM, FOR SUPPORT GO TO:
+https://community.citra-emu.org/
 
-- Provide the version (commit hash) of yuzu you are using.
-- Provide sufficient detail for the issue to be reproduced.
-- Provide:
+If the FAQ does not answer your question, please go to:
+https://community.citra-emu.org/
+
+When submitting an issue, please check the following:
+
+- You have read the above.
+- You have provided the version (commit hash) of yuzu you are using.
+- You have provided sufficient detail for the issue to be reproduced.
+- You have provided system specs (if relevant).
+- Please also provide:
+  - For any issues, a log file
   - For crashes, a backtrace.
   - For graphical issues, comparison screenshots with real hardware.
   - For emulation inaccuracies, a test-case (if able).
+
 -->
 
 
diff --git a/.gitmodules b/.gitmodules
index a33a04167..3a49c4874 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -37,3 +37,12 @@
 [submodule "discord-rpc"]
     path = externals/discord-rpc
     url = https://github.com/discordapp/discord-rpc.git
+[submodule "Vulkan-Headers"]
+    path = externals/Vulkan-Headers
+    url = https://github.com/KhronosGroup/Vulkan-Headers.git
+[submodule "externals/zstd"]
+    path = externals/zstd
+    url = https://github.com/facebook/zstd
+[submodule "sirit"]
+    path = externals/sirit
+    url = https://github.com/ReinUsesLisp/sirit
diff --git a/.travis.yml b/.travis.yml
index b0fbe3c5f..9512f7843 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
     - os: osx
       env: NAME="macos build"
       sudo: false
-      osx_image: xcode10
+      osx_image: xcode10.1
       install: "./.travis/macos/deps.sh"
       script: "./.travis/macos/build.sh"
       after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/common/travis-ci.env b/.travis/common/travis-ci.env
index ec8e2dd63..cffeb2e2b 100644
--- a/.travis/common/travis-ci.env
+++ b/.travis/common/travis-ci.env
@@ -6,6 +6,8 @@ TRAVIS_BRANCH
 TRAVIS_BUILD_ID
 TRAVIS_BUILD_NUMBER
 TRAVIS_COMMIT
+TRAVIS_COMMIT_RANGE
+TRAVIS_EVENT_TYPE
 TRAVIS_JOB_ID
 TRAVIS_JOB_NUMBER
 TRAVIS_REPO_SLUG
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index 4a14837fc..b7b4c6f8c 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@
 
 set -o pipefail
 
-export MACOSX_DEPLOYMENT_TARGET=10.13
+export MACOSX_DEPLOYMENT_TARGET=10.14
 export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"
diff --git a/.travis/macos/deps.sh b/.travis/macos/deps.sh
index 1a547c060..faeafa216 100755
--- a/.travis/macos/deps.sh
+++ b/.travis/macos/deps.sh
@@ -1,5 +1,6 @@
 #!/bin/sh -ex
 
 brew update
-brew install dylibbundler p7zip qt5 sdl2 ccache
+brew install p7zip qt5 sdl2 ccache
 brew outdated cmake || brew upgrade cmake
+pip3 install macpack
diff --git a/.travis/macos/upload.sh b/.travis/macos/upload.sh
index 9ba95086b..66e3455ff 100755
--- a/.travis/macos/upload.sh
+++ b/.travis/macos/upload.sh
@@ -11,92 +11,19 @@ mkdir "$REV_NAME"
 cp build/bin/yuzu-cmd "$REV_NAME"
 cp -r build/bin/yuzu.app "$REV_NAME"
 
-# move qt libs into app bundle for deployment
-$(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/yuzu.app"
+# move libs into folder for deployment
+macpack "${REV_NAME}/yuzu.app/Contents/MacOS/yuzu" -d "../Frameworks"
+# move qt frameworks into app bundle for deployment
+$(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/yuzu.app" -executable="${REV_NAME}/yuzu.app/Contents/MacOS/yuzu"
 
-# move SDL2 libs into folder for deployment
-dylibbundler -b -x "${REV_NAME}/yuzu-cmd" -cd -d "${REV_NAME}/libs" -p "@executable_path/libs/"
-
-# Make the changes to make the yuzu app standalone (i.e. not dependent on the current brew installation).
-# To do this, the absolute references to each and every QT framework must be re-written to point to the local frameworks
-# (in the Contents/Frameworks folder).
-# The "install_name_tool" is used to do so.
-
-# Coreutils is a hack to coerce Homebrew to point to the absolute Cellar path (symlink dereferenced). i.e:
-# ls -l /usr/local/opt/qt5:: /usr/local/opt/qt5 -> ../Cellar/qt5/5.6.1-1
-# grealpath ../Cellar/qt5/5.6.1-1:: /usr/local/Cellar/qt5/5.6.1-1
-brew install coreutils || brew upgrade coreutils || true
-
-REV_NAME_ALT=$REV_NAME/
-# grealpath is located in coreutils, there is no "realpath" for OS X :(
-QT_BREWS_PATH=$(grealpath "$(brew --prefix qt5)")
-BREW_PATH=$(brew --prefix)
-QT_VERSION_NUM=5
-
-$BREW_PATH/opt/qt5/bin/macdeployqt "${REV_NAME_ALT}yuzu.app" \
-    -executable="${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu"
-
-# These are the files that macdeployqt packed into Contents/Frameworks/ - we don't want those, so we replace them.
-declare -a macos_libs=("QtCore" "QtWidgets" "QtGui" "QtOpenGL" "QtPrintSupport")
-
-for macos_lib in "${macos_libs[@]}"
-do
-    SC_FRAMEWORK_PART=$macos_lib.framework/Versions/$QT_VERSION_NUM/$macos_lib
-    # Replace macdeployqt versions of the Frameworks with our own (from /usr/local/opt/qt5/lib/)
-    cp "$BREW_PATH/opt/qt5/lib/$SC_FRAMEWORK_PART" "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
-
-    # Replace references within the embedded Framework files with "internal" versions.
-    for macos_lib2 in "${macos_libs[@]}"
-    do
-        # Since brew references both the non-symlinked and symlink paths of QT5, it needs to be duplicated.
-        # /usr/local/Cellar/qt5/5.6.1-1/lib and /usr/local/opt/qt5/lib both resolve to the same files.
-        # So the two lines below are effectively duplicates when resolved as a path, but as strings, they aren't.
-        RM_FRAMEWORK_PART=$macos_lib2.framework/Versions/$QT_VERSION_NUM/$macos_lib2
-        install_name_tool -change \
-            $QT_BREWS_PATH/lib/$RM_FRAMEWORK_PART \
-            @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
-            "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
-        install_name_tool -change \
-            "$BREW_PATH/opt/qt5/lib/$RM_FRAMEWORK_PART" \
-            @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
-            "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
-    done
-done
-
-# Handles `This application failed to start because it could not find or load the Qt platform plugin "cocoa"`
-# Which manifests itself as:
-# "Exception Type: EXC_CRASH (SIGABRT) | Exception Codes: 0x0000000000000000, 0x0000000000000000 | Exception Note: EXC_CORPSE_NOTIFY"
-# There may be more dylibs needed to be fixed...
-declare -a macos_plugins=("Plugins/platforms/libqcocoa.dylib")
-
-for macos_lib in "${macos_plugins[@]}"
-do
-    install_name_tool -id @executable_path/../$macos_lib "${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
-    for macos_lib2 in "${macos_libs[@]}"
-    do
-        RM_FRAMEWORK_PART=$macos_lib2.framework/Versions/$QT_VERSION_NUM/$macos_lib2
-        install_name_tool -change \
-            $QT_BREWS_PATH/lib/$RM_FRAMEWORK_PART \
-            @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
-            "${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
-        install_name_tool -change \
-            "$BREW_PATH/opt/qt5/lib/$RM_FRAMEWORK_PART" \
-            @executable_path/../Frameworks/$RM_FRAMEWORK_PART \
-            "${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
-    done
-done
-
-for macos_lib in "${macos_libs[@]}"
-do
-    # Debugging info for Travis-CI
-    otool -L "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$macos_lib.framework/Versions/$QT_VERSION_NUM/$macos_lib"
-done
+# move libs into folder for deployment
+macpack "${REV_NAME}/yuzu-cmd" -d "libs"
 
 # Make the yuzu.app application launch a debugging terminal.
 # Store away the actual binary
-mv ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu-bin
+mv ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu-bin
 
-cat > ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu <<EOL
+cat > ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu <<EOL
 #!/usr/bin/env bash
 cd "\`dirname "\$0"\`"
 chmod +x yuzu-bin
@@ -105,6 +32,9 @@ EOL
 # Content that will serve as the launching script for yuzu (within the .app folder)
 
 # Make the launching script executable
-chmod +x ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu
+chmod +x ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu
+
+# Verify loader instructions
+find "$REV_NAME" -exec otool -L {} \;
 
 . .travis/common/post-upload.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 871e0ca1a..6a417017c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,8 @@ option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OF
 
 option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
 
+option(ENABLE_VULKAN "Enables Vulkan backend" ON)
+
 option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
 
 if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
@@ -102,90 +104,18 @@ endif()
 message(STATUS "Target architecture: ${ARCHITECTURE}")
 
 
-# Configure compilation flags
+# Configure C++ standard
 # ===========================
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-if (NOT MSVC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
-
-    if (MINGW)
-        add_definitions(-DMINGW_HAS_SECURE_API)
-
-        if (MINGW_STATIC_BUILD)
-            add_definitions(-DQT_STATICPLUGIN)
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static")
-            set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
-        endif()
-    endif()
-else()
-    # Silence "deprecation" warnings
-    add_definitions(/D_CRT_SECURE_NO_WARNINGS /D_CRT_NONSTDC_NO_DEPRECATE /D_SCL_SECURE_NO_WARNINGS)
-    # Avoid windows.h junk
-    add_definitions(/DNOMINMAX)
-    # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
-    add_definitions(/DWIN32_LEAN_AND_MEAN)
-
-    set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
-
-    # Tweak optimization settings
-    # As far as I can tell, there's no way to override the CMake defaults while leaving user
-    # changes intact, so we'll just clobber everything and say sorry.
-    message(STATUS "Cache compiler flags ignored, please edit CMakeLists.txt to change the flags.")
-
-    # /W3 - Level 3 warnings
-    # /MP - Multi-threaded compilation
-    # /Zi - Output debugging information
-    # /Zo - enhanced debug info for optimized builds
-    # /permissive- - enables stricter C++ standards conformance checks
-    set(CMAKE_C_FLAGS   "/W3 /MP /Zi /Zo /permissive-" CACHE STRING "" FORCE)
-    # /EHsc - C++-only exception handling semantics
-    # /Zc:throwingNew - let codegen assume `operator new` will never return null
-    # /Zc:inline - let codegen omit inline functions in object files
-    set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /EHsc /std:c++latest /Zc:throwingNew,inline" CACHE STRING "" FORCE)
-
-    # /MDd - Multi-threaded Debug Runtime DLL
-    set(CMAKE_C_FLAGS_DEBUG   "/Od /MDd" CACHE STRING "" FORCE)
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" CACHE STRING "" FORCE)
-
-    # /O2 - Optimization level 2
-    # /GS- - No stack buffer overflow checks
-    # /MD - Multi-threaded runtime DLL
-    set(CMAKE_C_FLAGS_RELEASE   "/O2 /GS- /MD" CACHE STRING "" FORCE)
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}" CACHE STRING "" FORCE)
-
-    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
-    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
-endif()
-
-# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
-# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
-if (CMAKE_COMPILER_IS_GNUCC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
-endif()
-
-# Set file offset size to 64 bits.
-#
-# On modern Unixes, this is typically already the case. The lone exception is
-# glibc, which may default to 32 bits. glibc allows this to be configured
-# by setting _FILE_OFFSET_BITS.
-if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
-    add_definitions(-D_FILE_OFFSET_BITS=64)
-endif()
-
-# CMake seems to only define _DEBUG on Windows
-set_property(DIRECTORY APPEND PROPERTY
-    COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
-
 # System imported libraries
 # ======================
 
-find_package(Boost 1.63.0 QUIET)
+find_package(Boost 1.66.0 QUIET)
 if (NOT Boost_FOUND)
-    message(STATUS "Boost 1.63.0 or newer not found, falling back to externals")
+    message(STATUS "Boost 1.66.0 or newer not found, falling back to externals")
 
     set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
     set(Boost_NO_SYSTEM_PATHS OFF)
@@ -330,25 +260,21 @@ endif()
 # Platform-specific library requirements
 # ======================================
 
-IF (APPLE)
-    find_library(COCOA_LIBRARY Cocoa)           # Umbrella framework for everything GUI-related
+if (APPLE)
+    # Umbrella framework for everything GUI-related
+    find_library(COCOA_LIBRARY Cocoa)
     set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY})
-
-    if (CMAKE_CXX_COMPILER_ID STREQUAL Clang)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
-    endif()
-ELSEIF (WIN32)
+elseif (WIN32)
     # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
     add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
     set(PLATFORM_LIBRARIES winmm ws2_32)
-    IF (MINGW)
+    if (MINGW)
         # PSAPI is the Process Status API
         set(PLATFORM_LIBRARIES ${PLATFORM_LIBRARIES} psapi imm32 version)
-    ENDIF (MINGW)
-ELSEIF (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU|SunOS)$")
+    endif()
+elseif (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU|SunOS)$")
     set(PLATFORM_LIBRARIES rt)
-ENDIF (APPLE)
+endif()
 
 # Setup a custom clang-format target (if clang-format can be found) that will run
 # against all the src files. This should be used before making a pull request.
@@ -383,7 +309,7 @@ if (CLANG_FORMAT)
     set(CCOMMENT "Running clang format against all the .h and .cpp files in src/")
     if (WIN32)
         add_custom_target(clang-format
-            COMMAND powershell.exe -Command "Get-ChildItem ${SRCS}/* -Include *.cpp,*.h -Recurse | Foreach {${CLANG_FORMAT} -i $_.fullname}"
+            COMMAND powershell.exe -Command "Get-ChildItem '${SRCS}/*' -Include *.cpp,*.h -Recurse | Foreach {&'${CLANG_FORMAT}' -i $_.fullname}"
             COMMENT ${CCOMMENT})
     elseif(MINGW)
         add_custom_target(clang-format
@@ -419,19 +345,6 @@ function(create_target_directory_groups target_name)
     endforeach()
 endfunction()
 
-# Gets a UTC timstamp and sets the provided variable to it
-function(get_timestamp _var)
-    string(TIMESTAMP timestamp UTC)
-    set(${_var} "${timestamp}" PARENT_SCOPE)
-endfunction()
-
-# generate git/build information
-include(GetGitRevisionDescription)
-get_git_head_revision(GIT_REF_SPEC GIT_REV)
-git_describe(GIT_DESC --always --long --dirty)
-git_branch_name(GIT_BRANCH)
-get_timestamp(BUILD_DATE)
-
 enable_testing()
 add_subdirectory(externals)
 add_subdirectory(src)
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
new file mode 100644
index 000000000..08315a1f1
--- /dev/null
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -0,0 +1,95 @@
+# Gets a UTC timstamp and sets the provided variable to it
+function(get_timestamp _var)
+    string(TIMESTAMP timestamp UTC)
+    set(${_var} "${timestamp}" PARENT_SCOPE)
+endfunction()
+
+list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
+# generate git/build information
+include(GetGitRevisionDescription)
+get_git_head_revision(GIT_REF_SPEC GIT_REV)
+git_describe(GIT_DESC --always --long --dirty)
+git_branch_name(GIT_BRANCH)
+get_timestamp(BUILD_DATE)
+
+# Generate cpp with Git revision from template
+# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
+set(REPO_NAME "")
+set(BUILD_VERSION "0")
+if (BUILD_REPOSITORY)
+  # regex capture the string nightly or canary into CMAKE_MATCH_1
+  string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
+  if (${CMAKE_MATCH_COUNT} GREATER 0)
+    # capitalize the first letter of each word in the repo name.
+    string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
+    foreach(WORD ${REPO_NAME_LIST})
+      string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
+      string(SUBSTRING ${WORD} 1 -1 REMAINDER)
+      string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
+      set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
+    endforeach()
+    if (BUILD_TAG)
+      string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
+      if (${CMAKE_MATCH_COUNT} GREATER 0)
+        set(BUILD_VERSION ${CMAKE_MATCH_1})
+      endif()
+      if (BUILD_VERSION)
+        # This leaves a trailing space on the last word, but we actually want that
+        # because of how it's styled in the title bar.
+        set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
+      else()
+        set(BUILD_FULLNAME "")
+      endif()
+    endif()
+  endif()
+endif()
+
+# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
+set(VIDEO_CORE "${SRC_DIR}/src/video_core")
+set(HASH_FILES
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
+    "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
+    "${VIDEO_CORE}/shader/decode/bfe.cpp"
+    "${VIDEO_CORE}/shader/decode/bfi.cpp"
+    "${VIDEO_CORE}/shader/decode/conversion.cpp"
+    "${VIDEO_CORE}/shader/decode/ffma.cpp"
+    "${VIDEO_CORE}/shader/decode/float_set.cpp"
+    "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/half_set.cpp"
+    "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/hfma2.cpp"
+    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
+    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/memory.cpp"
+    "${VIDEO_CORE}/shader/decode/texture.cpp"
+    "${VIDEO_CORE}/shader/decode/other.cpp"
+    "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
+    "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/shift.cpp"
+    "${VIDEO_CORE}/shader/decode/video.cpp"
+    "${VIDEO_CORE}/shader/decode/xmad.cpp"
+    "${VIDEO_CORE}/shader/decode.cpp"
+    "${VIDEO_CORE}/shader/shader_ir.cpp"
+    "${VIDEO_CORE}/shader/shader_ir.h"
+    "${VIDEO_CORE}/shader/track.cpp"
+)
+set(COMBINED "")
+foreach (F IN LISTS HASH_FILES)
+    file(READ ${F} TMP)
+    set(COMBINED "${COMBINED}${TMP}")
+endforeach()
+string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
+configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)
diff --git a/README.md b/README.md
index 1d5ee58cc..fa4233b2a 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
 
 It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
 
-yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
+yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.
 
 yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.
 
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index e156bbece..3f8b6cda8 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -49,6 +49,10 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL)
 add_library(unicorn-headers INTERFACE)
 target_include_directories(unicorn-headers INTERFACE ./unicorn/include)
 
+# Zstandard
+add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL)
+target_include_directories(libzstd_static INTERFACE ./zstd/lib)
+
 # SoundTouch
 add_subdirectory(soundtouch)
 
@@ -68,6 +72,11 @@ if (USE_DISCORD_PRESENCE)
     target_include_directories(discord-rpc INTERFACE ./discord-rpc/include)
 endif()
 
+# Sirit
+if (ENABLE_VULKAN)
+    add_subdirectory(sirit)
+endif()
+
 if (ENABLE_WEB_SERVICE)
     # LibreSSL
     set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
new file mode 160000
+Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202
diff --git a/externals/cubeb b/externals/cubeb
-Subproject 12b78c0edfa40007e41dbdcd9dfe367fbb98d01
+Subproject 6f2420de8f155b10330cf973900ac7bdbfee589
diff --git a/externals/opus b/externals/opus
-Subproject b2871922a12abb49579512d604cabc471a59ad9
+Subproject 562f8ba555c4181e1b57e82e496e4a959b9c019
diff --git a/externals/sirit b/externals/sirit
new file mode 160000
+Subproject f7c4b07a7e14edb1dcd93bc9879c823423705c2
diff --git a/externals/zstd b/externals/zstd
new file mode 160000
+Subproject 470344d33e1d52a2ada75d278466da8d4ee2faf
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f69d00a2b..6c99dd5e2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,18 +1,79 @@
 # Enable modules to include each other's files
 include_directories(.)
 
+# CMake seems to only define _DEBUG on Windows
+set_property(DIRECTORY APPEND PROPERTY
+    COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
+
+# Set compilation flags
+if (MSVC)
+    set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
+
+    # Silence "deprecation" warnings
+    add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
+
+    # Avoid windows.h junk
+    add_definitions(-DNOMINMAX)
+
+    # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
+    add_definitions(-DWIN32_LEAN_AND_MEAN)
+
+    # /W3 - Level 3 warnings
+    # /MP - Multi-threaded compilation
+    # /Zi - Output debugging information
+    # /Zo - enhanced debug info for optimized builds
+    # /permissive- - enables stricter C++ standards conformance checks
+    # /EHsc - C++-only exception handling semantics
+    # /Zc:throwingNew - let codegen assume `operator new` will never return null
+    # /Zc:inline - let codegen omit inline functions in object files
+    add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+
+    # /GS- - No stack buffer overflow checks
+    add_compile_options("$<$<CONFIG:Release>:/GS->")
+
+    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
+    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
+else()
+    add_compile_options("-Wno-attributes")
+
+    if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
+        add_compile_options("-stdlib=libc++")
+    endif()
+
+    # Set file offset size to 64 bits.
+    #
+    # On modern Unixes, this is typically already the case. The lone exception is
+    # glibc, which may default to 32 bits. glibc allows this to be configured
+    # by setting _FILE_OFFSET_BITS.
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
+        add_definitions(-D_FILE_OFFSET_BITS=64)
+    endif()
+
+    if (MINGW)
+        add_definitions(-DMINGW_HAS_SECURE_API)
+
+        if (MINGW_STATIC_BUILD)
+            add_definitions(-DQT_STATICPLUGIN)
+            add_compile_options("-static")
+        endif()
+    endif()
+endif()
+
 add_subdirectory(common)
 add_subdirectory(core)
 add_subdirectory(audio_core)
 add_subdirectory(video_core)
 add_subdirectory(input_common)
 add_subdirectory(tests)
+
 if (ENABLE_SDL2)
     add_subdirectory(yuzu_cmd)
 endif()
+
 if (ENABLE_QT)
     add_subdirectory(yuzu)
 endif()
+
 if (ENABLE_WEB_SERVICE)
     add_subdirectory(web_service)
 endif()
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp
index 50d2a1ed3..8619a3f03 100644
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
     return {};
 }
 
-StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
+StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
+                               u32 num_channels, std::string&& name,
                                Stream::ReleaseCallback&& release_callback) {
     if (!sink) {
         sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
     }
 
     return std::make_shared<Stream>(
-        sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
+        core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
         sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
 }
 
diff --git a/src/audio_core/audio_out.h b/src/audio_core/audio_out.h
index df9607ac7..b07588287 100644
--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -13,6 +13,10 @@
 #include "audio_core/stream.h"
 #include "common/common_types.h"
 
+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace AudioCore {
 
 /**
@@ -21,8 +25,8 @@ namespace AudioCore {
 class AudioOut {
 public:
     /// Opens a new audio stream
-    StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
-                         Stream::ReleaseCallback&& release_callback);
+    StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
+                         std::string&& name, Stream::ReleaseCallback&& release_callback);
 
     /// Returns a vector of recently released buffers specified by tag for the specified stream
     std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 00c026511..9a0939883 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -8,6 +8,7 @@
 #include "audio_core/codec.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/memory.h"
 
@@ -71,14 +72,14 @@ private:
     EffectOutStatus out_status{};
     EffectInStatus info{};
 };
-AudioRenderer::AudioRenderer(AudioRendererParameter params,
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                              Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
     : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
       effects(params.effect_count) {
 
     audio_out = std::make_unique<AudioCore::AudioOut>();
-    stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer",
-                                   [=]() { buffer_event->Signal(); });
+    stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
+                                   "AudioRenderer", [=]() { buffer_event->Signal(); });
     audio_out->StartStream(stream);
 
     QueueMixedBuffer(0);
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 7826881bf..b2e5d336c 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -14,6 +14,10 @@
 #include "common/swap.h"
 #include "core/hle/kernel/object.h"
 
+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Kernel {
 class WritableEvent;
 }
@@ -42,16 +46,18 @@ struct AudioRendererParameter {
     u32_le sample_rate;
     u32_le sample_count;
     u32_le mix_buffer_count;
-    u32_le unknown_c;
+    u32_le submix_count;
     u32_le voice_count;
     u32_le sink_count;
     u32_le effect_count;
-    u32_le unknown_1c;
-    u8 unknown_20;
-    INSERT_PADDING_BYTES(3);
+    u32_le performance_frame_count;
+    u8 is_voice_drop_enabled;
+    u8 unknown_21;
+    u8 unknown_22;
+    u8 execution_mode;
     u32_le splitter_count;
-    u32_le unknown_2c;
-    INSERT_PADDING_WORDS(1);
+    u32_le num_splitter_send_channels;
+    u32_le unknown_30;
     u32_le revision;
 };
 static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
@@ -208,7 +214,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
 
 class AudioRenderer {
 public:
-    AudioRenderer(AudioRendererParameter params,
+    AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                   Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
     ~AudioRenderer();
 
diff --git a/src/audio_core/buffer.h b/src/audio_core/buffer.h
index a323b23ec..5ee09e9aa 100644
--- a/src/audio_core/buffer.h
+++ b/src/audio_core/buffer.h
@@ -21,7 +21,7 @@ public:
     Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}
 
     /// Returns the raw audio data for the buffer
-    std::vector<s16>& Samples() {
+    std::vector<s16>& GetSamples() {
         return samples;
     }
 
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index 454de798b..c5a0d98ce 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
         }
     }
 
-    state.yn1 = yn1;
-    state.yn2 = yn2;
+    state.yn1 = static_cast<s16>(yn1);
+    state.yn2 = static_cast<s16>(yn2);
 
     return ret;
 }
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 097328901..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
 #include "common/ring_buffer.h"
 #include "core/settings.h"
 
+#ifdef _WIN32
+#include <objbase.h>
+#endif
+
 namespace AudioCore {
 
 class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
         }
     }
 
-    ~CubebSinkStream() {
+    ~CubebSinkStream() override {
         if (!ctx) {
             return;
         }
@@ -75,11 +79,11 @@ public:
         queue.Push(samples);
     }
 
-    std::size_t SamplesInQueue(u32 num_channels) const override {
+    std::size_t SamplesInQueue(u32 channel_count) const override {
         if (!ctx)
             return 0;
 
-        return queue.Size() / num_channels;
+        return queue.Size() / channel_count;
     }
 
     void Flush() override {
@@ -98,7 +102,7 @@ private:
     u32 num_channels{};
 
     Common::RingBuffer<s16, 0x10000> queue;
-    std::array<s16, 2> last_frame;
+    std::array<s16, 2> last_frame{};
     std::atomic<bool> should_flush{};
     TimeStretcher time_stretch;
 
@@ -108,6 +112,11 @@ private:
 };
 
 CubebSink::CubebSink(std::string_view target_device_name) {
+    // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
+#ifdef _WIN32
+    com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
+#endif
+
     if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
         LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
         return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
     }
 
     cubeb_destroy(ctx);
+
+#ifdef _WIN32
+    if (SUCCEEDED(com_init_result)) {
+        CoUninitialize();
+    }
+#endif
 }
 
 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
     cubeb* ctx{};
     cubeb_devid output_device{};
     std::vector<SinkStreamPtr> sink_streams;
+
+#ifdef _WIN32
+    u32 com_init_result = 0;
+#endif
 };
 
 std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 874673c4e..22a3f8c84 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -32,13 +32,13 @@ u32 Stream::GetNumChannels() const {
     return {};
 }
 
-Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-               SinkStream& sink_stream, std::string&& name_)
+Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+               ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
     : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
-      sink_stream{sink_stream}, name{std::move(name_)} {
+      sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {
 
-    release_event = CoreTiming::RegisterEvent(
-        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
+    release_event = core_timing.RegisterEvent(
+        name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); });
 }
 
 void Stream::Play() {
@@ -57,7 +57,7 @@ Stream::State Stream::GetState() const {
 
 s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
     const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
-    return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
+    return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
 }
 
 static void VolumeAdjustSamples(std::vector<s16>& samples) {
@@ -68,7 +68,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
     }
 
     // Implementation of a volume slider with a dynamic range of 60 dB
-    const float volume_scale_factor{std::exp(6.90775f * volume) * 0.001f};
+    const float volume_scale_factor = volume == 0 ? 0 : std::exp(6.90775f * volume) * 0.001f;
     for (auto& sample : samples) {
         sample = static_cast<s16>(sample * volume_scale_factor);
     }
@@ -95,11 +95,11 @@ void Stream::PlayNextBuffer() {
     active_buffer = queued_buffers.front();
     queued_buffers.pop();
 
-    VolumeAdjustSamples(active_buffer->Samples());
+    VolumeAdjustSamples(active_buffer->GetSamples());
 
     sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
 
-    CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
+    core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
 }
 
 void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index aebfeb51d..05071243b 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -13,9 +13,10 @@
 #include "audio_core/buffer.h"
 #include "common/common_types.h"
 
-namespace CoreTiming {
+namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing
 
 namespace AudioCore {
 
@@ -42,8 +43,8 @@ public:
     /// Callback function type, used to change guest state on a buffer being released
     using ReleaseCallback = std::function<void()>;
 
-    Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-           SinkStream& sink_stream, std::string&& name_);
+    Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+           ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);
 
     /// Plays the audio stream
     void Play();
@@ -91,16 +92,17 @@ private:
     /// Gets the number of core cycles when the specified buffer will be released
     s64 GetBufferReleaseCycles(const Buffer& buffer) const;
 
-    u32 sample_rate;                        ///< Sample rate of the stream
-    Format format;                          ///< Format of the stream
-    ReleaseCallback release_callback;       ///< Buffer release callback for the stream
-    State state{State::Stopped};            ///< Playback state of the stream
-    CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream
-    BufferPtr active_buffer;                ///< Actively playing buffer in the stream
-    std::queue<BufferPtr> queued_buffers;   ///< Buffers queued to be played in the stream
-    std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
-    SinkStream& sink_stream;                ///< Output sink for the stream
-    std::string name;                       ///< Name of the stream, must be unique
+    u32 sample_rate;                          ///< Sample rate of the stream
+    Format format;                            ///< Format of the stream
+    ReleaseCallback release_callback;         ///< Buffer release callback for the stream
+    State state{State::Stopped};              ///< Playback state of the stream
+    Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
+    BufferPtr active_buffer;                  ///< Actively playing buffer in the stream
+    std::queue<BufferPtr> queued_buffers;     ///< Buffers queued to be played in the stream
+    std::queue<BufferPtr> released_buffers;   ///< Buffers recently released from the stream
+    SinkStream& sink_stream;                  ///< Output sink for the stream
+    Core::Timing::CoreTiming& core_timing;    ///< Core timing instance.
+    std::string name;                         ///< Name of the stream, must be unique
 };
 
 using StreamPtr = std::shared_ptr<Stream>;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 845626fc5..1e8e1b215 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,42 +1,70 @@
-# Generate cpp with Git revision from template
-# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
-set(REPO_NAME "")
-set(BUILD_VERSION "0")
-if ($ENV{CI})
-  if ($ENV{TRAVIS})
+# Add a custom command to generate a new shader_cache_version hash when any of the following files change
+# NOTE: This is an approximation of what files affect shader generation, its possible something else
+# could affect the result, but much more unlikely than the following files. Keeping a list of files
+# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
+set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
+if (DEFINED ENV{CI})
+  if (DEFINED ENV{TRAVIS})
     set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
     set(BUILD_TAG $ENV{TRAVIS_TAG})
-  elseif($ENV{APPVEYOR})
+  elseif(DEFINED ENV{APPVEYOR})
     set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
     set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME})
   endif()
-  # regex capture the string nightly or canary into CMAKE_MATCH_1
-  string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
-  if (${CMAKE_MATCH_COUNT} GREATER 0)
-    # capitalize the first letter of each word in the repo name.
-    string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
-    foreach(WORD ${REPO_NAME_LIST})
-      string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
-      string(SUBSTRING ${WORD} 1 -1 REMAINDER)
-      string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
-      set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
-    endforeach()
-    if (BUILD_TAG)
-      string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
-      if (${CMAKE_MATCH_COUNT} GREATER 0)
-        set(BUILD_VERSION ${CMAKE_MATCH_1})
-      endif()
-      if (BUILD_VERSION)
-        # This leaves a trailing space on the last word, but we actually want that
-        # because of how it's styled in the title bar.
-        set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
-      else()
-        set(BUILD_FULLNAME "")
-      endif()
-    endif()
-  endif()
 endif()
-configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY)
+add_custom_command(OUTPUT scm_rev.cpp
+    COMMAND ${CMAKE_COMMAND}
+      -DSRC_DIR="${CMAKE_SOURCE_DIR}"
+      -DBUILD_REPOSITORY="${BUILD_REPOSITORY}"
+      -DBUILD_TAG="${BUILD_TAG}"
+      -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
+    DEPENDS
+      # WARNING! It was too much work to try and make a common location for this list,
+      # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
+      "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
+      "${VIDEO_CORE}/shader/decode/bfe.cpp"
+      "${VIDEO_CORE}/shader/decode/bfi.cpp"
+      "${VIDEO_CORE}/shader/decode/conversion.cpp"
+      "${VIDEO_CORE}/shader/decode/ffma.cpp"
+      "${VIDEO_CORE}/shader/decode/float_set.cpp"
+      "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/half_set.cpp"
+      "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/hfma2.cpp"
+      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
+      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/memory.cpp"
+      "${VIDEO_CORE}/shader/decode/texture.cpp"
+      "${VIDEO_CORE}/shader/decode/other.cpp"
+      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
+      "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/shift.cpp"
+      "${VIDEO_CORE}/shader/decode/video.cpp"
+      "${VIDEO_CORE}/shader/decode/xmad.cpp"
+      "${VIDEO_CORE}/shader/decode.cpp"
+      "${VIDEO_CORE}/shader/shader_ir.cpp"
+      "${VIDEO_CORE}/shader/shader_ir.h"
+      "${VIDEO_CORE}/shader/track.cpp"
+      # and also check that the scm_rev files haven't changed
+      "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
+      "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
+      # technically we should regenerate if the git version changed, but its not worth the effort imo
+      "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
+)
 
 add_library(common STATIC
     alignment.h
@@ -63,11 +91,18 @@ add_library(common STATIC
     logging/log.h
     logging/text_formatter.cpp
     logging/text_formatter.h
+    lz4_compression.cpp
+    lz4_compression.h
     math_util.h
+    memory_hook.cpp
+    memory_hook.h
     microprofile.cpp
     microprofile.h
     microprofileui.h
     misc.cpp
+    multi_level_queue.h
+    page_table.cpp
+    page_table.h
     param_package.cpp
     param_package.h
     quaternion.h
@@ -86,8 +121,12 @@ add_library(common STATIC
     threadsafe_queue.h
     timer.cpp
     timer.h
+    uint128.cpp
+    uint128.h
     vector_math.h
     web_result.h
+    zstd_compression.cpp
+    zstd_compression.h
 )
 
 if(ARCHITECTURE_x86_64)
@@ -101,3 +140,4 @@ endif()
 create_target_directory_groups(common)
 
 target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
+target_link_libraries(common PRIVATE lz4_static libzstd_static)
diff --git a/src/common/assert.h b/src/common/assert.h
index 6002f7ab1..4b0e3f64e 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -57,3 +57,21 @@ __declspec(noinline, noreturn)
 
 #define UNIMPLEMENTED_IF(cond) ASSERT_MSG(!(cond), "Unimplemented code!")
 #define UNIMPLEMENTED_IF_MSG(cond, ...) ASSERT_MSG(!(cond), __VA_ARGS__)
+
+// If the assert is ignored, execute _b_
+#define ASSERT_OR_EXECUTE(_a_, _b_)                                                                \
+    do {                                                                                           \
+        ASSERT(_a_);                                                                               \
+        if (!(_a_)) {                                                                              \
+            _b_                                                                                    \
+        }                                                                                          \
+    } while (0)
+
+// If the assert is ignored, execute _b_
+#define ASSERT_OR_EXECUTE_MSG(_a_, _b_, ...)                                                       \
+    do {                                                                                           \
+        ASSERT_MSG(_a_, __VA_ARGS__);                                                              \
+        if (!(_a_)) {                                                                              \
+            _b_                                                                                    \
+        }                                                                                          \
+    } while (0)
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 21e07925d..902e668e3 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -34,6 +34,7 @@
 #include <limits>
 #include <type_traits>
 #include "common/common_funcs.h"
+#include "common/swap.h"
 
 /*
  * Abstract bitfield class
@@ -108,15 +109,9 @@
  * symptoms.
  */
 #pragma pack(1)
-template <std::size_t Position, std::size_t Bits, typename T>
+template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
 struct BitField {
 private:
-    // We hide the copy assigment operator here, because the default copy
-    // assignment would copy the full storage value, rather than just the bits
-    // relevant to this particular bit field.
-    // We don't delete it because we want BitField to be trivially copyable.
-    constexpr BitField& operator=(const BitField&) = default;
-
     // UnderlyingType is T for non-enum types and the underlying type of T if
     // T is an enumeration. Note that T is wrapped within an enable_if in the
     // former case to workaround compile errors which arise when using
@@ -127,6 +122,8 @@ private:
     // We store the value as the unsigned type to avoid undefined behaviour on value shifting
     using StorageType = std::make_unsigned_t<UnderlyingType>;
 
+    using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
+
 public:
     /// Constants to allow limited introspection of fields if needed
     static constexpr std::size_t position = Position;
@@ -163,16 +160,20 @@ public:
     BitField(T val) = delete;
     BitField& operator=(T val) = delete;
 
-    // Force default constructor to be created
-    // so that we can use this within unions
-    constexpr BitField() = default;
+    constexpr BitField() noexcept = default;
+
+    constexpr BitField(const BitField&) noexcept = default;
+    constexpr BitField& operator=(const BitField&) noexcept = default;
+
+    constexpr BitField(BitField&&) noexcept = default;
+    constexpr BitField& operator=(BitField&&) noexcept = default;
 
     constexpr FORCE_INLINE operator T() const {
         return Value();
     }
 
     constexpr FORCE_INLINE void Assign(const T& value) {
-        storage = (storage & ~mask) | FormatValue(value);
+        storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
     }
 
     constexpr T Value() const {
@@ -184,7 +185,7 @@ public:
     }
 
 private:
-    StorageType storage;
+    StorageTypeWithEndian storage;
 
     static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
 
@@ -195,3 +196,6 @@ private:
     static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
 };
 #pragma pack()
+
+template <std::size_t Position, std::size_t Bits, typename T>
+using BitFieldBE = BitField<Position, Bits, T, BETag>;
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 1eea17ba1..d032df413 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -32,7 +32,7 @@ inline u32 CountLeadingZeroes32(u32 value) {
     return 32;
 }
 
-inline u64 CountLeadingZeroes64(u64 value) {
+inline u32 CountLeadingZeroes64(u64 value) {
     unsigned long leading_zero = 0;
 
     if (_BitScanReverse64(&leading_zero, value) != 0) {
@@ -47,15 +47,54 @@ inline u32 CountLeadingZeroes32(u32 value) {
         return 32;
     }
 
-    return __builtin_clz(value);
+    return static_cast<u32>(__builtin_clz(value));
 }
 
-inline u64 CountLeadingZeroes64(u64 value) {
+inline u32 CountLeadingZeroes64(u64 value) {
     if (value == 0) {
         return 64;
     }
 
-    return __builtin_clzll(value);
+    return static_cast<u32>(__builtin_clzll(value));
 }
 #endif
+
+#ifdef _MSC_VER
+inline u32 CountTrailingZeroes32(u32 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 32;
+}
+
+inline u32 CountTrailingZeroes64(u64 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward64(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 64;
+}
+#else
+inline u32 CountTrailingZeroes32(u32 value) {
+    if (value == 0) {
+        return 32;
+    }
+
+    return static_cast<u32>(__builtin_ctz(value));
+}
+
+inline u32 CountTrailingZeroes64(u64 value) {
+    if (value == 0) {
+        return 64;
+    }
+
+    return static_cast<u32>(__builtin_ctzll(value));
+}
+#endif
+
 } // namespace Common
diff --git a/src/common/color.h b/src/common/color.h
index 0379040be..3a2222077 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
 /**
  * Decode a color stored in RGBA8 format
  * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
  */
-inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
     return {bytes[3], bytes[2], bytes[1], bytes[0]};
 }
 
 /**
  * Decode a color stored in RGB8 format
  * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
  */
-inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
     return {bytes[2], bytes[1], bytes[0], 255};
 }
 
 /**
  * Decode a color stored in RG8 (aka HILO8) format
  * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
  */
-inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
     return {bytes[1], bytes[0], 0, 255};
 }
 
 /**
  * Decode a color stored in RGB565 format
  * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
  */
-inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
     u16_le pixel;
     std::memcpy(&pixel, bytes, sizeof(pixel));
     return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
 /**
  * Decode a color stored in RGB5A1 format
  * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
  */
-inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
     u16_le pixel;
     std::memcpy(&pixel, bytes, sizeof(pixel));
     return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
 /**
  * Decode a color stored in RGBA4 format
  * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
  */
-inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
     u16_le pixel;
     std::memcpy(&pixel, bytes, sizeof(pixel));
     return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
 /**
  * Decode a depth value and a stencil value stored in D24S8 format
  * @param bytes Pointer to encoded source values
- * @return Resulting values stored as a Math::Vec2
+ * @return Resulting values stored as a Common::Vec2
  */
-inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
     return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
 }
 
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
  * @param color Source color to encode
  * @param bytes Destination pointer to store encoded color
  */
-inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
     bytes[3] = color.r();
     bytes[2] = color.g();
     bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
  * @param color Source color to encode
  * @param bytes Destination pointer to store encoded color
  */
-inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
     bytes[2] = color.r();
     bytes[1] = color.g();
     bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
  * @param color Source color to encode
  * @param bytes Destination pointer to store encoded color
  */
-inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
     bytes[1] = color.r();
     bytes[0] = color.g();
 }
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
  * @param color Source color to encode
  * @param bytes Destination pointer to store encoded color
  */
-inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
     const u16_le data =
         (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
 
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
  * @param color Source color to encode
  * @param bytes Destination pointer to store encoded color
  */
-inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
     const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
                         (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
 
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
  * @param color Source color to encode
  * @param bytes Destination pointer to store encoded color
  */
-inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
     const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
                      (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
 
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index 4f88de768..076752d3b 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -35,6 +35,7 @@
 #define KEYS_DIR "keys"
 #define LOAD_DIR "load"
 #define DUMP_DIR "dump"
+#define SHADER_DIR "shader"
 #define LOG_DIR "log"
 
 // Filenames
diff --git a/src/common/common_types.h b/src/common/common_types.h
index 6b1766dca..4cec89fbd 100644
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -40,10 +40,9 @@ using s64 = std::int64_t; ///< 64-bit signed int
 using f32 = float;  ///< 32-bit floating point
 using f64 = double; ///< 64-bit floating point
 
-// TODO: It would be nice to eventually replace these with strong types that prevent accidental
-// conversion between each other.
-using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
-using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
+using VAddr = u64;    ///< Represents a pointer in the userspace virtual address space.
+using PAddr = u64;    ///< Represents a pointer in the ARM11 physical address space.
+using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.
 
 using u128 = std::array<std::uint64_t, 2>;
 static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
diff --git a/src/common/detached_tasks.cpp b/src/common/detached_tasks.cpp
index a347d9e02..f268d6021 100644
--- a/src/common/detached_tasks.cpp
+++ b/src/common/detached_tasks.cpp
@@ -16,22 +16,22 @@ DetachedTasks::DetachedTasks() {
 }
 
 void DetachedTasks::WaitForAllTasks() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     cv.wait(lock, [this]() { return count == 0; });
 }
 
 DetachedTasks::~DetachedTasks() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     ASSERT(count == 0);
     instance = nullptr;
 }
 
 void DetachedTasks::AddTask(std::function<void()> task) {
-    std::unique_lock<std::mutex> lock(instance->mutex);
+    std::unique_lock lock{instance->mutex};
     ++instance->count;
     std::thread([task{std::move(task)}]() {
         task();
-        std::unique_lock<std::mutex> lock(instance->mutex);
+        std::unique_lock lock{instance->mutex};
         --instance->count;
         std::notify_all_at_thread_exit(instance->cv, std::move(lock));
     })
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index b52492da6..aecb66c32 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -710,6 +710,7 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
         paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP);
         paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
         paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
+        paths.emplace(UserPath::ShaderDir, user_path + SHADER_DIR DIR_SEP);
         paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
         paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
         // TODO: Put the logs in a better location for each OS
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 571503d2a..38cc7f059 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -31,6 +31,7 @@ enum class UserPath {
     SDMCDir,
     LoadDir,
     DumpDir,
+    ShaderDir,
     SysDataDir,
     UserDir,
 };
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 12f6d0114..a03179520 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,19 +39,19 @@ public:
     Impl(Impl const&) = delete;
     const Impl& operator=(Impl const&) = delete;
 
-    void PushEntry(Entry e) {
-        std::lock_guard<std::mutex> lock(message_mutex);
-        message_queue.Push(std::move(e));
-        message_cv.notify_one();
+    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
+                   const char* function, std::string message) {
+        message_queue.Push(
+            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
     }
 
     void AddBackend(std::unique_ptr<Backend> backend) {
-        std::lock_guard<std::mutex> lock(writing_mutex);
+        std::lock_guard lock{writing_mutex};
         backends.push_back(std::move(backend));
     }
 
     void RemoveBackend(std::string_view backend_name) {
-        std::lock_guard<std::mutex> lock(writing_mutex);
+        std::lock_guard lock{writing_mutex};
         const auto it =
             std::remove_if(backends.begin(), backends.end(),
                            [&backend_name](const auto& i) { return backend_name == i->GetName(); });
@@ -80,21 +80,19 @@ private:
         backend_thread = std::thread([&] {
             Entry entry;
             auto write_logs = [&](Entry& e) {
-                std::lock_guard<std::mutex> lock(writing_mutex);
+                std::lock_guard lock{writing_mutex};
                 for (const auto& backend : backends) {
                     backend->Write(e);
                 }
             };
             while (true) {
-                {
-                    std::unique_lock<std::mutex> lock(message_mutex);
-                    message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
-                }
-                if (!running) {
+                entry = message_queue.PopWait();
+                if (entry.final_entry) {
                     break;
                 }
                 write_logs(entry);
             }
+
             // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
             // where a system is repeatedly spamming logs even on close.
             const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,18 +104,36 @@ private:
     }
 
     ~Impl() {
-        running = false;
-        message_cv.notify_one();
+        Entry entry;
+        entry.final_entry = true;
+        message_queue.Push(entry);
         backend_thread.join();
     }
 
-    std::atomic_bool running{true};
-    std::mutex message_mutex, writing_mutex;
-    std::condition_variable message_cv;
+    Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
+                      const char* function, std::string message) const {
+        using std::chrono::duration_cast;
+        using std::chrono::steady_clock;
+
+        Entry entry;
+        entry.timestamp =
+            duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
+        entry.log_class = log_class;
+        entry.log_level = log_level;
+        entry.filename = Common::TrimSourcePath(filename);
+        entry.line_num = line_nr;
+        entry.function = function;
+        entry.message = std::move(message);
+
+        return entry;
+    }
+
+    std::mutex writing_mutex;
     std::thread backend_thread;
     std::vector<std::unique_ptr<Backend>> backends;
     Common::MPSCQueue<Log::Entry> message_queue;
     Filter filter;
+    std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
 };
 
 void ConsoleBackend::Write(const Entry& entry) {
@@ -232,6 +248,7 @@ void DebuggerBackend::Write(const Entry& entry) {
     CLS(Render)                                                                                    \
     SUB(Render, Software)                                                                          \
     SUB(Render, OpenGL)                                                                            \
+    SUB(Render, Vulkan)                                                                            \
     CLS(Audio)                                                                                     \
     SUB(Audio, DSP)                                                                                \
     SUB(Audio, Sink)                                                                               \
@@ -275,25 +292,6 @@ const char* GetLevelName(Level log_level) {
 #undef LVL
 }
 
-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message) {
-    using std::chrono::duration_cast;
-    using std::chrono::steady_clock;
-
-    static steady_clock::time_point time_origin = steady_clock::now();
-
-    Entry entry;
-    entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
-    entry.log_class = log_class;
-    entry.log_level = log_level;
-    entry.filename = Common::TrimSourcePath(filename);
-    entry.line_num = line_nr;
-    entry.function = function;
-    entry.message = std::move(message);
-
-    return entry;
-}
-
 void SetGlobalFilter(const Filter& filter) {
     Impl::Instance().SetGlobalFilter(filter);
 }
@@ -318,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
     if (!filter.CheckMessage(log_class, log_level))
         return;
 
-    Entry entry =
-        CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args));
-
-    instance.PushEntry(std::move(entry));
+    instance.PushEntry(log_class, log_level, filename, line_num, function,
+                       fmt::vformat(format, args));
 }
 } // namespace Log
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index 91bb0c309..fca0267a1 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,6 +27,7 @@ struct Entry {
     unsigned int line_num;
     std::string function;
     std::string message;
+    bool final_entry = false;
 
     Entry() = default;
     Entry(Entry&& o) = default;
@@ -134,10 +135,6 @@ const char* GetLogClassName(Class log_class);
  */
 const char* GetLevelName(Level log_level);
 
-/// Creates a log entry by formatting the given source location, and message.
-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message);
-
 /**
  * The global filter will prevent any messages from even being processed if they are filtered. Each
  * backend can have a filter, but if the level is lower than the global filter, the backend will
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index d4ec31ec3..8ed6d5050 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
     Render,            ///< Emulator video output and hardware acceleration
     Render_Software,   ///< Software renderer backend
     Render_OpenGL,     ///< OpenGL backend
+    Render_Vulkan,     ///< Vulkan backend
     Audio,             ///< Audio emulation
     Audio_DSP,         ///< The HLE implementation of the DSP
     Audio_Sink,        ///< Emulator audio output backend
diff --git a/src/common/lz4_compression.cpp b/src/common/lz4_compression.cpp
new file mode 100644
index 000000000..ade6759bb
--- /dev/null
+++ b/src/common/lz4_compression.cpp
@@ -0,0 +1,76 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <lz4hc.h>
+
+#include "common/assert.h"
+#include "common/lz4_compression.h"
+
+namespace Common::Compression {
+
+std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size) {
+    ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
+
+    const auto source_size_int = static_cast<int>(source_size);
+    const int max_compressed_size = LZ4_compressBound(source_size_int);
+    std::vector<u8> compressed(max_compressed_size);
+
+    const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
+                                                     reinterpret_cast<char*>(compressed.data()),
+                                                     source_size_int, max_compressed_size);
+
+    if (compressed_size <= 0) {
+        // Compression failed
+        return {};
+    }
+
+    compressed.resize(compressed_size);
+
+    return compressed;
+}
+
+std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size,
+                                  s32 compression_level) {
+    ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
+
+    compression_level = std::clamp(compression_level, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
+
+    const auto source_size_int = static_cast<int>(source_size);
+    const int max_compressed_size = LZ4_compressBound(source_size_int);
+    std::vector<u8> compressed(max_compressed_size);
+
+    const int compressed_size = LZ4_compress_HC(
+        reinterpret_cast<const char*>(source), reinterpret_cast<char*>(compressed.data()),
+        source_size_int, max_compressed_size, compression_level);
+
+    if (compressed_size <= 0) {
+        // Compression failed
+        return {};
+    }
+
+    compressed.resize(compressed_size);
+
+    return compressed;
+}
+
+std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size) {
+    return CompressDataLZ4HC(source, source_size, LZ4HC_CLEVEL_MAX);
+}
+
+std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed,
+                                  std::size_t uncompressed_size) {
+    std::vector<u8> uncompressed(uncompressed_size);
+    const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
+                                               reinterpret_cast<char*>(uncompressed.data()),
+                                               static_cast<int>(compressed.size()),
+                                               static_cast<int>(uncompressed.size()));
+    if (static_cast<int>(uncompressed_size) != size_check) {
+        // Decompression failed
+        return {};
+    }
+    return uncompressed;
+}
+
+} // namespace Common::Compression
diff --git a/src/common/lz4_compression.h b/src/common/lz4_compression.h
new file mode 100644
index 000000000..fe2231a6c
--- /dev/null
+++ b/src/common/lz4_compression.h
@@ -0,0 +1,55 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common::Compression {
+
+/**
+ * Compresses a source memory region with LZ4 and returns the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size);
+
+/**
+ * Utilizes the LZ4 subalgorithm LZ4HC with the specified compression level. Higher compression
+ * levels result in a smaller compressed size, but require more CPU time for compression. The
+ * compression level has almost no impact on decompression speed. Data compressed with LZ4HC can
+ * also be decompressed with the default LZ4 decompression.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ * @param compression_level the used compression level. Should be between 3 and 12.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size, s32 compression_level);
+
+/**
+ * Utilizes the LZ4 subalgorithm LZ4HC with the highest possible compression level.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size);
+
+/**
+ * Decompresses a source memory region with LZ4 and returns the uncompressed data in a vector.
+ *
+ * @param compressed the compressed source memory region.
+ * @param uncompressed_size the size in bytes of the uncompressed data.
+ *
+ * @return the decompressed data.
+ */
+std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed, std::size_t uncompressed_size);
+
+} // namespace Common::Compression
+\ No newline at end of file
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 94b4394c5..cff3d48c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
 #include <cstdlib>
 #include <type_traits>
 
-namespace MathUtil {
+namespace Common {
 
 constexpr float PI = 3.14159265f;
 
@@ -41,4 +41,4 @@ struct Rectangle {
     }
 };
 
-} // namespace MathUtil
+} // namespace Common
diff --git a/src/core/memory_hook.cpp b/src/common/memory_hook.cpp
index c61c6c1fb..3986986d6 100644
--- a/src/core/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
 
-namespace Memory {
+namespace Common {
 
 MemoryHook::~MemoryHook() = default;
 
-} // namespace Memory
+} // namespace Common
diff --git a/src/core/memory_hook.h b/src/common/memory_hook.h
index 940777107..adaa4c2c5 100644
--- a/src/core/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@
 
 #include "common/common_types.h"
 
-namespace Memory {
+namespace Common {
 
 /**
  * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
 };
 
 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
-} // namespace Memory
+} // namespace Common
diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h
new file mode 100644
index 000000000..9cb448f56
--- /dev/null
+++ b/src/common/multi_level_queue.h
@@ -0,0 +1,337 @@
+// Copyright 2019 TuxSH
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <iterator>
+#include <list>
+#include <utility>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Common {
+
+/**
+ * A MultiLevelQueue is a type of priority queue which has the following characteristics:
+ * - iteratable through each of its elements.
+ * - back can be obtained.
+ * - O(1) add, lookup (both front and back)
+ * - discrete priorities and a max of 64 priorities (limited domain)
+ * This type of priority queue is normaly used for managing threads within an scheduler
+ */
+template <typename T, std::size_t Depth>
+class MultiLevelQueue {
+public:
+    using value_type = T;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+
+    using difference_type = typename std::pointer_traits<pointer>::difference_type;
+    using size_type = std::size_t;
+
+    template <bool is_constant>
+    class iterator_impl {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = T;
+        using pointer = std::conditional_t<is_constant, T*, const T*>;
+        using reference = std::conditional_t<is_constant, const T&, T&>;
+        using difference_type = typename std::pointer_traits<pointer>::difference_type;
+
+        friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) {
+            if (lhs.IsEnd() && rhs.IsEnd())
+                return true;
+            return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it);
+        }
+
+        friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) {
+            return !operator==(lhs, rhs);
+        }
+
+        reference operator*() const {
+            return *it;
+        }
+
+        pointer operator->() const {
+            return it.operator->();
+        }
+
+        iterator_impl& operator++() {
+            if (IsEnd()) {
+                return *this;
+            }
+
+            ++it;
+
+            if (it == GetEndItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= ~((1ULL << (current_priority + 1)) - 1);
+                if (prios == 0) {
+                    current_priority = static_cast<u32>(mlq.depth());
+                } else {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetBeginItForPrio();
+                }
+            }
+            return *this;
+        }
+
+        iterator_impl& operator--() {
+            if (IsEnd()) {
+                if (mlq.used_priorities != 0) {
+                    current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else if (it == GetBeginItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= (1ULL << current_priority) - 1;
+                if (prios != 0) {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else {
+                --it;
+            }
+            return *this;
+        }
+
+        iterator_impl operator++(int) {
+            const iterator_impl v{*this};
+            ++(*this);
+            return v;
+        }
+
+        iterator_impl operator--(int) {
+            const iterator_impl v{*this};
+            --(*this);
+            return v;
+        }
+
+        // allow implicit const->non-const
+        iterator_impl(const iterator_impl<false>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl(const iterator_impl<true>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl& operator=(const iterator_impl<false>& other) {
+            mlq = other.mlq;
+            it = other.it;
+            current_priority = other.current_priority;
+            return *this;
+        }
+
+        friend class iterator_impl<true>;
+        iterator_impl() = default;
+
+    private:
+        friend class MultiLevelQueue;
+        using container_ref =
+            std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>;
+        using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator,
+                                                 typename std::list<T>::iterator>;
+
+        explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority)
+            : mlq(mlq), it(it), current_priority(current_priority) {}
+        explicit iterator_impl(container_ref mlq, u32 current_priority)
+            : mlq(mlq), it(), current_priority(current_priority) {}
+
+        bool IsEnd() const {
+            return current_priority == mlq.depth();
+        }
+
+        list_iterator GetBeginItForPrio() const {
+            return mlq.levels[current_priority].begin();
+        }
+
+        list_iterator GetEndItForPrio() const {
+            return mlq.levels[current_priority].end();
+        }
+
+        container_ref mlq;
+        list_iterator it;
+        u32 current_priority;
+    };
+
+    using iterator = iterator_impl<false>;
+    using const_iterator = iterator_impl<true>;
+
+    void add(const T& element, u32 priority, bool send_back = true) {
+        if (send_back)
+            levels[priority].push_back(element);
+        else
+            levels[priority].push_front(element);
+        used_priorities |= 1ULL << priority;
+    }
+
+    void remove(const T& element, u32 priority) {
+        auto it = ListIterateTo(levels[priority], element);
+        if (it == levels[priority].end())
+            return;
+        levels[priority].erase(it);
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        remove(element, old_priority);
+        add(element, new_priority, !adjust_front);
+    }
+    void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        adjust(*it, old_priority, new_priority, adjust_front);
+    }
+
+    void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_front(*it, priority, other);
+    }
+
+    void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_back(*it, priority, other);
+    }
+
+    void yield(u32 priority, std::size_t n = 1) {
+        ListShiftForward(levels[priority], n);
+    }
+
+    std::size_t depth() const {
+        return Depth;
+    }
+
+    std::size_t size(u32 priority) const {
+        return levels[priority].size();
+    }
+
+    std::size_t size() const {
+        u64 priorities = used_priorities;
+        std::size_t size = 0;
+        while (priorities != 0) {
+            const u64 current_priority = CountTrailingZeroes64(priorities);
+            size += levels[current_priority].size();
+            priorities &= ~(1ULL << current_priority);
+        }
+        return size;
+    }
+
+    bool empty() const {
+        return used_priorities == 0;
+    }
+
+    bool empty(u32 priority) const {
+        return (used_priorities & (1ULL << priority)) == 0;
+    }
+
+    u32 highest_priority_set(u32 max_priority = 0) const {
+        const u64 priorities =
+            max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1));
+        return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities));
+    }
+
+    u32 lowest_priority_set(u32 min_priority = Depth - 1) const {
+        const u64 priorities = min_priority >= Depth - 1
+                                   ? used_priorities
+                                   : (used_priorities & ((1ULL << (min_priority + 1)) - 1));
+        return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities);
+    }
+
+    const_iterator cbegin(u32 max_prio = 0) const {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? cend()
+                                 : const_iterator{*this, levels[priority].cbegin(), priority};
+    }
+    const_iterator begin(u32 max_prio = 0) const {
+        return cbegin(max_prio);
+    }
+    iterator begin(u32 max_prio = 0) {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority};
+    }
+
+    const_iterator cend(u32 min_prio = Depth - 1) const {
+        return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1);
+    }
+    const_iterator end(u32 min_prio = Depth - 1) const {
+        return cend(min_prio);
+    }
+    iterator end(u32 min_prio = Depth - 1) {
+        return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1);
+    }
+
+    T& front(u32 max_priority = 0) {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+    const T& front(u32 max_priority = 0) const {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+
+    T back(u32 min_priority = Depth - 1) {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+    const T& back(u32 min_priority = Depth - 1) const {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+
+private:
+    using const_list_iterator = typename std::list<T>::const_iterator;
+
+    static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) {
+        if (shift >= list.size()) {
+            return;
+        }
+
+        const auto begin_range = list.begin();
+        const auto end_range = std::next(begin_range, shift);
+        list.splice(list.end(), list, begin_range, end_range);
+    }
+
+    static void ListSplice(std::list<T>& in_list, const_list_iterator position,
+                           std::list<T>& out_list, const_list_iterator element) {
+        in_list.splice(position, out_list, element);
+    }
+
+    static const_list_iterator ListIterateTo(const std::list<T>& list, const T& element) {
+        auto it = list.cbegin();
+        while (it != list.cend() && *it != element) {
+            ++it;
+        }
+        return it;
+    }
+
+    std::array<std::list<T>, Depth> levels;
+    u64 used_priorities = 0;
+};
+
+} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
new file mode 100644
index 000000000..69b7abc54
--- /dev/null
+++ b/src/common/page_table.cpp
@@ -0,0 +1,31 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/page_table.h"
+
+namespace Common {
+
+PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
+
+PageTable::~PageTable() = default;
+
+void PageTable::Resize(std::size_t address_space_width_in_bits) {
+    const std::size_t num_page_table_entries = 1ULL
+                                               << (address_space_width_in_bits - page_size_in_bits);
+
+    pointers.resize(num_page_table_entries);
+    attributes.resize(num_page_table_entries);
+    backing_addr.resize(num_page_table_entries);
+
+    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
+    // vector size is subsequently decreased (via resize), the vector might not automatically
+    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
+    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
+
+    pointers.shrink_to_fit();
+    attributes.shrink_to_fit();
+    backing_addr.shrink_to_fit();
+}
+
+} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
new file mode 100644
index 000000000..8b8ff0bb8
--- /dev/null
+++ b/src/common/page_table.h
@@ -0,0 +1,84 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include "common/common_types.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+
+enum class PageType : u8 {
+    /// Page is unmapped and should cause an access error.
+    Unmapped,
+    /// Page is mapped to regular memory. This is the only type you can get pointers to.
+    Memory,
+    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
+    /// invalidation
+    RasterizerCachedMemory,
+    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
+    Special,
+    /// Page is allocated for use.
+    Allocated,
+};
+
+struct SpecialRegion {
+    enum class Type {
+        DebugHook,
+        IODevice,
+    } type;
+
+    MemoryHookPointer handler;
+
+    bool operator<(const SpecialRegion& other) const {
+        return std::tie(type, handler) < std::tie(other.type, other.handler);
+    }
+
+    bool operator==(const SpecialRegion& other) const {
+        return std::tie(type, handler) == std::tie(other.type, other.handler);
+    }
+};
+
+/**
+ * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
+ * mimics the way a real CPU page table works.
+ */
+struct PageTable {
+    explicit PageTable(std::size_t page_size_in_bits);
+    ~PageTable();
+
+    /**
+     * Resizes the page table to be able to accomodate enough pages within
+     * a given address space.
+     *
+     * @param address_space_width_in_bits The address size width in bits.
+     */
+    void Resize(std::size_t address_space_width_in_bits);
+
+    /**
+     * Vector of memory pointers backing each page. An entry can only be non-null if the
+     * corresponding entry in the `attributes` vector is of type `Memory`.
+     */
+    std::vector<u8*> pointers;
+
+    /**
+     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
+     * of type `Special`.
+     */
+    boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions;
+
+    /**
+     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
+     * the corresponding entry in `pointers` MUST be set to null.
+     */
+    std::vector<PageType> attributes;
+
+    std::vector<u64> backing_addr;
+
+    const std::size_t page_size_in_bits{};
+};
+
+} // namespace Common
diff --git a/src/common/quaternion.h b/src/common/quaternion.h
index c528c0b68..370198ae0 100644
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@
 
 #include "common/vector_math.h"
 
-namespace Math {
+namespace Common {
 
 template <typename T>
 class Quaternion {
 public:
-    Math::Vec3<T> xyz;
+    Vec3<T> xyz;
     T w{};
 
     Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
 };
 
 template <typename T>
-auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
+auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
     return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
 }
 
-inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
+inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
     return {axis * std::sin(angle / 2), std::cos(angle / 2)};
 }
 
-} // namespace Math
+} // namespace Common
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 2b1727769..d69038f65 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -11,6 +11,7 @@
 #define BUILD_DATE   "@BUILD_DATE@"
 #define BUILD_FULLNAME "@BUILD_FULLNAME@"
 #define BUILD_VERSION "@BUILD_VERSION@"
+#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
 
 namespace Common {
 
@@ -21,6 +22,7 @@ const char g_build_name[]   = BUILD_NAME;
 const char g_build_date[]   = BUILD_DATE;
 const char g_build_fullname[] = BUILD_FULLNAME;
 const char g_build_version[]  = BUILD_VERSION;
+const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
 
 } // namespace
 
diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h
index af9a9daed..666bf0367 100644
--- a/src/common/scm_rev.h
+++ b/src/common/scm_rev.h
@@ -13,5 +13,6 @@ extern const char g_build_name[];
 extern const char g_build_date[];
 extern const char g_build_fullname[];
 extern const char g_build_version[];
+extern const char g_shader_cache_version[];
 
 } // namespace Common
diff --git a/src/common/scope_exit.h b/src/common/scope_exit.h
index baf1f1c9e..1176a72b1 100644
--- a/src/common/scope_exit.h
+++ b/src/common/scope_exit.h
@@ -20,7 +20,7 @@ struct ScopeExitHelper {
 
 template <typename Func>
 ScopeExitHelper<Func> ScopeExit(Func&& func) {
-    return ScopeExitHelper<Func>(std::move(func));
+    return ScopeExitHelper<Func>(std::forward<Func>(func));
 }
 } // namespace detail
 
diff --git a/src/common/swap.h b/src/common/swap.h
index 32af0b6ac..71932c2bb 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -17,19 +17,16 @@
 
 #pragma once
 
+#include <type_traits>
+
 #if defined(_MSC_VER)
 #include <cstdlib>
-#elif defined(__linux__)
-#include <byteswap.h>
-#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) ||                     \
-    defined(__NetBSD__) || defined(__OpenBSD__)
-#include <sys/endian.h>
 #endif
 #include <cstring>
 #include "common/common_types.h"
 
-// GCC 4.6+
-#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+// GCC
+#ifdef __GNUC__
 
 #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +35,7 @@
 #endif
 
 // LLVM/clang
-#elif __clang__
+#elif defined(__clang__)
 
 #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -60,86 +57,49 @@
 namespace Common {
 
 #ifdef _MSC_VER
-inline u16 swap16(u16 _data) {
-    return _byteswap_ushort(_data);
-}
-inline u32 swap32(u32 _data) {
-    return _byteswap_ulong(_data);
-}
-inline u64 swap64(u64 _data) {
-    return _byteswap_uint64(_data);
-}
-#elif defined(ARCHITECTURE_ARM) && (__ARM_ARCH >= 6)
-inline u16 swap16(u16 _data) {
-    u32 data = _data;
-    __asm__("rev16 %0, %1\n" : "=l"(data) : "l"(data));
-    return (u16)data;
-}
-inline u32 swap32(u32 _data) {
-    __asm__("rev %0, %1\n" : "=l"(_data) : "l"(_data));
-    return _data;
-}
-inline u64 swap64(u64 _data) {
-    return ((u64)swap32(_data) << 32) | swap32(_data >> 32);
-}
-#elif __linux__
-inline u16 swap16(u16 _data) {
-    return bswap_16(_data);
-}
-inline u32 swap32(u32 _data) {
-    return bswap_32(_data);
-}
-inline u64 swap64(u64 _data) {
-    return bswap_64(_data);
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
+    return _byteswap_ushort(data);
 }
-#elif __APPLE__
-inline __attribute__((always_inline)) u16 swap16(u16 _data) {
-    return (_data >> 8) | (_data << 8);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return _byteswap_ulong(data);
 }
-inline __attribute__((always_inline)) u32 swap32(u32 _data) {
-    return __builtin_bswap32(_data);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return _byteswap_uint64(data);
 }
-inline __attribute__((always_inline)) u64 swap64(u64 _data) {
-    return __builtin_bswap64(_data);
-}
-#elif defined(__Bitrig__) || defined(__OpenBSD__)
+#elif defined(__clang__) || defined(__GNUC__)
+#if defined(__Bitrig__) || defined(__OpenBSD__)
 // redefine swap16, swap32, swap64 as inline functions
 #undef swap16
 #undef swap32
 #undef swap64
-inline u16 swap16(u16 _data) {
-    return __swap16(_data);
-}
-inline u32 swap32(u32 _data) {
-    return __swap32(_data);
-}
-inline u64 swap64(u64 _data) {
-    return __swap64(_data);
-}
-#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__)
-inline u16 swap16(u16 _data) {
-    return bswap16(_data);
+#endif
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
+    return __builtin_bswap16(data);
 }
-inline u32 swap32(u32 _data) {
-    return bswap32(_data);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return __builtin_bswap32(data);
 }
-inline u64 swap64(u64 _data) {
-    return bswap64(_data);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return __builtin_bswap64(data);
 }
 #else
-// Slow generic implementation.
-inline u16 swap16(u16 data) {
+// Generic implementation.
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
     return (data >> 8) | (data << 8);
 }
-inline u32 swap32(u32 data) {
-    return (swap16(data) << 16) | swap16(data >> 16);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return ((data & 0xFF000000U) >> 24) | ((data & 0x00FF0000U) >> 8) |
+           ((data & 0x0000FF00U) << 8) | ((data & 0x000000FFU) << 24);
 }
-inline u64 swap64(u64 data) {
-    return ((u64)swap32(data) << 32) | swap32(data >> 32);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return ((data & 0xFF00000000000000ULL) >> 56) | ((data & 0x00FF000000000000ULL) >> 40) |
+           ((data & 0x0000FF0000000000ULL) >> 24) | ((data & 0x000000FF00000000ULL) >> 8) |
+           ((data & 0x00000000FF000000ULL) << 8) | ((data & 0x0000000000FF0000ULL) << 24) |
+           ((data & 0x000000000000FF00ULL) << 40) | ((data & 0x00000000000000FFULL) << 56);
 }
 #endif
 
-inline float swapf(float f) {
+[[nodiscard]] inline float swapf(float f) noexcept {
     static_assert(sizeof(u32) == sizeof(float), "float must be the same size as uint32_t.");
 
     u32 value;
@@ -151,7 +111,7 @@ inline float swapf(float f) {
     return f;
 }
 
-inline double swapd(double f) {
+[[nodiscard]] inline double swapd(double f) noexcept {
     static_assert(sizeof(u64) == sizeof(double), "double must be the same size as uint64_t.");
 
     u64 value;
@@ -170,7 +130,7 @@ struct swap_struct_t {
     using swapped_t = swap_struct_t;
 
 protected:
-    T value = T();
+    T value;
 
     static T swap(T v) {
         return F::swap(v);
@@ -605,52 +565,154 @@ struct swap_double_t {
     }
 };
 
-#if COMMON_LITTLE_ENDIAN
-using u16_le = u16;
-using u32_le = u32;
-using u64_le = u64;
+template <typename T>
+struct swap_enum_t {
+    static_assert(std::is_enum_v<T>);
+    using base = std::underlying_type_t<T>;
 
-using s16_le = s16;
-using s32_le = s32;
-using s64_le = s64;
+public:
+    swap_enum_t() = default;
+    swap_enum_t(const T& v) : value(swap(v)) {}
 
-using float_le = float;
-using double_le = double;
+    swap_enum_t& operator=(const T& v) {
+        value = swap(v);
+        return *this;
+    }
 
-using u64_be = swap_struct_t<u64, swap_64_t<u64>>;
-using s64_be = swap_struct_t<s64, swap_64_t<s64>>;
+    operator T() const {
+        return swap(value);
+    }
 
-using u32_be = swap_struct_t<u32, swap_32_t<u32>>;
-using s32_be = swap_struct_t<s32, swap_32_t<s32>>;
+    explicit operator base() const {
+        return static_cast<base>(swap(value));
+    }
 
-using u16_be = swap_struct_t<u16, swap_16_t<u16>>;
-using s16_be = swap_struct_t<s16, swap_16_t<s16>>;
+protected:
+    T value{};
+    // clang-format off
+    using swap_t = std::conditional_t<
+        std::is_same_v<base, u16>, swap_16_t<u16>, std::conditional_t<
+        std::is_same_v<base, s16>, swap_16_t<s16>, std::conditional_t<
+        std::is_same_v<base, u32>, swap_32_t<u32>, std::conditional_t<
+        std::is_same_v<base, s32>, swap_32_t<s32>, std::conditional_t<
+        std::is_same_v<base, u64>, swap_64_t<u64>, std::conditional_t<
+        std::is_same_v<base, s64>, swap_64_t<s64>, void>>>>>>;
+    // clang-format on
+    static T swap(T x) {
+        return static_cast<T>(swap_t::swap(static_cast<base>(x)));
+    }
+};
 
-using float_be = swap_struct_t<float, swap_float_t<float>>;
-using double_be = swap_struct_t<double, swap_double_t<double>>;
-#else
+struct SwapTag {}; // Use the different endianness from the system
+struct KeepTag {}; // Use the same endianness as the system
 
-using u64_le = swap_struct_t<u64, swap_64_t<u64>>;
-using s64_le = swap_struct_t<s64, swap_64_t<s64>>;
+template <typename T, typename Tag>
+struct AddEndian;
 
-using u32_le = swap_struct_t<u32, swap_32_t<u32>>;
-using s32_le = swap_struct_t<s32, swap_32_t<s32>>;
+// KeepTag specializations
 
-using u16_le = swap_struct_t<u16, swap_16_t<u16>>;
-using s16_le = swap_struct_t<s16, swap_16_t<s16>>;
+template <typename T>
+struct AddEndian<T, KeepTag> {
+    using type = T;
+};
 
-using float_le = swap_struct_t<float, swap_float_t<float>>;
-using double_le = swap_struct_t<double, swap_double_t<double>>;
+// SwapTag specializations
 
-using u16_be = u16;
-using u32_be = u32;
-using u64_be = u64;
+template <>
+struct AddEndian<u8, SwapTag> {
+    using type = u8;
+};
 
-using s16_be = s16;
-using s32_be = s32;
-using s64_be = s64;
+template <>
+struct AddEndian<u16, SwapTag> {
+    using type = swap_struct_t<u16, swap_16_t<u16>>;
+};
 
-using float_be = float;
-using double_be = double;
+template <>
+struct AddEndian<u32, SwapTag> {
+    using type = swap_struct_t<u32, swap_32_t<u32>>;
+};
+
+template <>
+struct AddEndian<u64, SwapTag> {
+    using type = swap_struct_t<u64, swap_64_t<u64>>;
+};
+
+template <>
+struct AddEndian<s8, SwapTag> {
+    using type = s8;
+};
+
+template <>
+struct AddEndian<s16, SwapTag> {
+    using type = swap_struct_t<s16, swap_16_t<s16>>;
+};
+
+template <>
+struct AddEndian<s32, SwapTag> {
+    using type = swap_struct_t<s32, swap_32_t<s32>>;
+};
+
+template <>
+struct AddEndian<s64, SwapTag> {
+    using type = swap_struct_t<s64, swap_64_t<s64>>;
+};
+
+template <>
+struct AddEndian<float, SwapTag> {
+    using type = swap_struct_t<float, swap_float_t<float>>;
+};
+
+template <>
+struct AddEndian<double, SwapTag> {
+    using type = swap_struct_t<double, swap_double_t<double>>;
+};
+
+template <typename T>
+struct AddEndian<T, SwapTag> {
+    static_assert(std::is_enum_v<T>);
+    using type = swap_enum_t<T>;
+};
+
+// Alias LETag/BETag as KeepTag/SwapTag depending on the system
+#if COMMON_LITTLE_ENDIAN
+
+using LETag = KeepTag;
+using BETag = SwapTag;
+
+#else
+
+using BETag = KeepTag;
+using LETag = SwapTag;
 
 #endif
+
+// Aliases for LE types
+using u16_le = AddEndian<u16, LETag>::type;
+using u32_le = AddEndian<u32, LETag>::type;
+using u64_le = AddEndian<u64, LETag>::type;
+
+using s16_le = AddEndian<s16, LETag>::type;
+using s32_le = AddEndian<s32, LETag>::type;
+using s64_le = AddEndian<s64, LETag>::type;
+
+template <typename T>
+using enum_le = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, LETag>::type>;
+
+using float_le = AddEndian<float, LETag>::type;
+using double_le = AddEndian<double, LETag>::type;
+
+// Aliases for BE types
+using u16_be = AddEndian<u16, BETag>::type;
+using u32_be = AddEndian<u32, BETag>::type;
+using u64_be = AddEndian<u64, BETag>::type;
+
+using s16_be = AddEndian<s16, BETag>::type;
+using s32_be = AddEndian<s32, BETag>::type;
+using s64_be = AddEndian<s64, BETag>::type;
+
+template <typename T>
+using enum_be = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, BETag>::type>;
+
+using float_be = AddEndian<float, BETag>::type;
+using double_be = AddEndian<double, BETag>::type;
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 5144c0d9f..fe7a420cc 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -27,18 +27,6 @@ namespace Common {
 
 #ifdef _MSC_VER
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
-    SetThreadAffinityMask(thread, mask);
-}
-
-void SetCurrentThreadAffinity(u32 mask) {
-    SetThreadAffinityMask(GetCurrentThread(), mask);
-}
-
-void SwitchCurrentThread() {
-    SwitchToThread();
-}
-
 // Sets the debugger-visible name of the current thread.
 // Uses undocumented (actually, it is now documented) trick.
 // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp
@@ -70,31 +58,6 @@ void SetCurrentThreadName(const char* name) {
 
 #else // !MSVC_VER, so must be POSIX threads
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
-#ifdef __APPLE__
-    thread_policy_set(pthread_mach_thread_np(thread), THREAD_AFFINITY_POLICY, (integer_t*)&mask, 1);
-#elif (defined __linux__ || defined __FreeBSD__) && !(defined ANDROID)
-    cpu_set_t cpu_set;
-    CPU_ZERO(&cpu_set);
-
-    for (int i = 0; i != sizeof(mask) * 8; ++i)
-        if ((mask >> i) & 1)
-            CPU_SET(i, &cpu_set);
-
-    pthread_setaffinity_np(thread, sizeof(cpu_set), &cpu_set);
-#endif
-}
-
-void SetCurrentThreadAffinity(u32 mask) {
-    SetThreadAffinity(pthread_self(), mask);
-}
-
-#ifndef _WIN32
-void SwitchCurrentThread() {
-    usleep(1000 * 1);
-}
-#endif
-
 // MinGW with the POSIX threading model does not support pthread_setname_np
 #if !defined(_WIN32) || defined(_MSC_VER)
 void SetCurrentThreadName(const char* name) {
diff --git a/src/common/thread.h b/src/common/thread.h
index 2cf74452d..0cfd98be6 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,14 +9,13 @@
 #include <cstddef>
 #include <mutex>
 #include <thread>
-#include "common/common_types.h"
 
 namespace Common {
 
 class Event {
 public:
     void Set() {
-        std::lock_guard<std::mutex> lk(mutex);
+        std::lock_guard lk{mutex};
         if (!is_set) {
             is_set = true;
             condvar.notify_one();
@@ -24,14 +23,14 @@ public:
     }
 
     void Wait() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         condvar.wait(lk, [&] { return is_set; });
         is_set = false;
     }
 
     template <class Clock, class Duration>
     bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         if (!condvar.wait_until(lk, time, [this] { return is_set; }))
             return false;
         is_set = false;
@@ -39,7 +38,7 @@ public:
     }
 
     void Reset() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         // no other action required, since wait loops on the predicate and any lingering signal will
         // get cleared on the first iteration
         is_set = false;
@@ -57,7 +56,7 @@ public:
 
     /// Blocks until all "count" threads have called Sync()
     void Sync() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         const std::size_t current_generation = generation;
 
         if (++waiting == count) {
@@ -78,9 +77,6 @@ private:
     std::size_t generation = 0; // Incremented once each time the barrier is used
 };
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask);
-void SetCurrentThreadAffinity(u32 mask);
-void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
 void SetCurrentThreadName(const char* name);
 
 } // namespace Common
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index e7594db68..791f99a8c 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -6,7 +6,6 @@
 
 #include <array>
 #include <deque>
-#include <boost/range/algorithm_ext/erase.hpp>
 
 namespace Common {
 
@@ -111,8 +110,9 @@ struct ThreadQueueList {
     }
 
     void remove(Priority priority, const T& thread_id) {
-        Queue* cur = &queues[priority];
-        boost::remove_erase(cur->data, thread_id);
+        Queue* const cur = &queues[priority];
+        const auto iter = std::remove(cur->data.begin(), cur->data.end(), thread_id);
+        cur->data.erase(iter, cur->data.end());
     }
 
     void rotate(Priority priority) {
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index edf13bc49..e714ba5b3 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,17 @@
 // a simple lockless thread-safe,
 // single reader, single writer queue
 
-#include <algorithm>
 #include <atomic>
+#include <condition_variable>
 #include <cstddef>
 #include <mutex>
-#include "common/common_types.h"
+#include <utility>
 
 namespace Common {
-template <typename T, bool NeedSize = true>
+template <typename T>
 class SPSCQueue {
 public:
-    SPSCQueue() : size(0) {
+    SPSCQueue() {
         write_ptr = read_ptr = new ElementPtr();
     }
     ~SPSCQueue() {
@@ -25,13 +25,12 @@ public:
         delete read_ptr;
     }
 
-    u32 Size() const {
-        static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
+    std::size_t Size() const {
         return size.load();
     }
 
     bool Empty() const {
-        return !read_ptr->next.load();
+        return Size() == 0;
     }
 
     T& Front() const {
@@ -47,13 +46,14 @@ public:
         ElementPtr* new_ptr = new ElementPtr();
         write_ptr->next.store(new_ptr, std::memory_order_release);
         write_ptr = new_ptr;
-        if (NeedSize)
-            size++;
+        cv.notify_one();
+
+        ++size;
     }
 
     void Pop() {
-        if (NeedSize)
-            size--;
+        --size;
+
         ElementPtr* tmpptr = read_ptr;
         // advance the read pointer
         read_ptr = tmpptr->next.load();
@@ -66,8 +66,7 @@ public:
         if (Empty())
             return false;
 
-        if (NeedSize)
-            size--;
+        --size;
 
         ElementPtr* tmpptr = read_ptr;
         read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -77,6 +76,16 @@ public:
         return true;
     }
 
+    T PopWait() {
+        if (Empty()) {
+            std::unique_lock lock{cv_mutex};
+            cv.wait(lock, [this]() { return !Empty(); });
+        }
+        T t;
+        Pop(t);
+        return t;
+    }
+
     // not thread-safe
     void Clear() {
         size.store(0);
@@ -89,7 +98,7 @@ private:
     // and a pointer to the next ElementPtr
     class ElementPtr {
     public:
-        ElementPtr() : next(nullptr) {}
+        ElementPtr() {}
         ~ElementPtr() {
             ElementPtr* next_ptr = next.load();
 
@@ -98,21 +107,23 @@ private:
         }
 
         T current;
-        std::atomic<ElementPtr*> next;
+        std::atomic<ElementPtr*> next{nullptr};
     };
 
     ElementPtr* write_ptr;
     ElementPtr* read_ptr;
-    std::atomic<u32> size;
+    std::atomic_size_t size{0};
+    std::mutex cv_mutex;
+    std::condition_variable cv;
 };
 
 // a simple thread-safe,
 // single reader, multiple writer queue
 
-template <typename T, bool NeedSize = true>
+template <typename T>
 class MPSCQueue {
 public:
-    u32 Size() const {
+    std::size_t Size() const {
         return spsc_queue.Size();
     }
 
@@ -126,7 +137,7 @@ public:
 
     template <typename Arg>
     void Push(Arg&& t) {
-        std::lock_guard<std::mutex> lock(write_lock);
+        std::lock_guard lock{write_lock};
         spsc_queue.Push(t);
     }
 
@@ -138,13 +149,17 @@ public:
         return spsc_queue.Pop(t);
     }
 
+    T PopWait() {
+        return spsc_queue.PopWait();
+    }
+
     // not thread-safe
     void Clear() {
         spsc_queue.Clear();
     }
 
 private:
-    SPSCQueue<T, NeedSize> spsc_queue;
+    SPSCQueue<T> spsc_queue;
     std::mutex write_lock;
 };
 } // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..32bf56730
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    const u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
+    return {accum, remainder};
+}
+
+} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..a3be2a2cb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,19 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 8feb49941..429485329 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
 #include <cmath>
 #include <type_traits>
 
-namespace Math {
+namespace Common {
 
 template <typename T>
 class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
     return MakeVec(x, yzw[0], yzw[1], yzw[2]);
 }
 
-} // namespace Math
+} // namespace Common
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp
new file mode 100644
index 000000000..60a35c67c
--- /dev/null
+++ b/src/common/zstd_compression.cpp
@@ -0,0 +1,53 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <zstd.h>
+
+#include "common/assert.h"
+#include "common/zstd_compression.h"
+
+namespace Common::Compression {
+
+std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) {
+    compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel());
+
+    const std::size_t max_compressed_size = ZSTD_compressBound(source_size);
+    std::vector<u8> compressed(max_compressed_size);
+
+    const std::size_t compressed_size =
+        ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level);
+
+    if (ZSTD_isError(compressed_size)) {
+        // Compression failed
+        return {};
+    }
+
+    compressed.resize(compressed_size);
+
+    return compressed;
+}
+
+std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) {
+    return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT);
+}
+
+std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) {
+    const std::size_t decompressed_size =
+        ZSTD_getDecompressedSize(compressed.data(), compressed.size());
+    std::vector<u8> decompressed(decompressed_size);
+
+    const std::size_t uncompressed_result_size = ZSTD_decompress(
+        decompressed.data(), decompressed.size(), compressed.data(), compressed.size());
+
+    if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) {
+        // Decompression failed
+        return {};
+    }
+    return decompressed;
+}
+
+} // namespace Common::Compression
diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h
new file mode 100644
index 000000000..e0a64b035
--- /dev/null
+++ b/src/common/zstd_compression.h
@@ -0,0 +1,42 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common::Compression {
+
+/**
+ * Compresses a source memory region with Zstandard and returns the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ * @param compression_level the used compression level. Should be between 1 and 22.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level);
+
+/**
+ * Compresses a source memory region with Zstandard with the default compression level and returns
+ * the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size);
+
+/**
+ * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector.
+ *
+ * @param compressed the compressed source memory region.
+ *
+ * @return the decompressed data.
+ */
+std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed);
+
+} // namespace Common::Compression
+\ No newline at end of file
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index aa9e05089..c59107102 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -31,6 +31,8 @@ add_library(core STATIC
     file_sys/bis_factory.h
     file_sys/card_image.cpp
     file_sys/card_image.h
+    file_sys/cheat_engine.cpp
+    file_sys/cheat_engine.h
     file_sys/content_archive.cpp
     file_sys/content_archive.h
     file_sys/control_metadata.cpp
@@ -68,6 +70,8 @@ add_library(core STATIC
     file_sys/system_archive/ng_word.h
     file_sys/system_archive/system_archive.cpp
     file_sys/system_archive/system_archive.h
+    file_sys/system_archive/system_version.cpp
+    file_sys/system_archive/system_version.h
     file_sys/vfs.cpp
     file_sys/vfs.h
     file_sys/vfs_concat.cpp
@@ -95,6 +99,8 @@ add_library(core STATIC
     frontend/framebuffer_layout.cpp
     frontend/framebuffer_layout.h
     frontend/input.h
+    frontend/scope_acquire_window_context.cpp
+    frontend/scope_acquire_window_context.h
     gdbstub/gdbstub.cpp
     gdbstub/gdbstub.h
     hle/ipc.h
@@ -105,6 +111,8 @@ add_library(core STATIC
     hle/kernel/client_port.h
     hle/kernel/client_session.cpp
     hle/kernel/client_session.h
+    hle/kernel/code_set.cpp
+    hle/kernel/code_set.h
     hle/kernel/errors.h
     hle/kernel/handle_table.cpp
     hle/kernel/handle_table.h
@@ -138,8 +146,8 @@ add_library(core STATIC
     hle/kernel/svc_wrap.h
     hle/kernel/thread.cpp
     hle/kernel/thread.h
-    hle/kernel/timer.cpp
-    hle/kernel/timer.h
+    hle/kernel/transfer_memory.cpp
+    hle/kernel/transfer_memory.h
     hle/kernel/vm_manager.cpp
     hle/kernel/vm_manager.h
     hle/kernel/wait_object.cpp
@@ -217,6 +225,7 @@ add_library(core STATIC
     hle/service/audio/audren_u.h
     hle/service/audio/codecctl.cpp
     hle/service/audio/codecctl.h
+    hle/service/audio/errors.h
     hle/service/audio/hwopus.cpp
     hle/service/audio/hwopus.h
     hle/service/bcat/bcat.cpp
@@ -400,6 +409,10 @@ add_library(core STATIC
     hle/service/time/time.h
     hle/service/usb/usb.cpp
     hle/service/usb/usb.h
+    hle/service/vi/display/vi_display.cpp
+    hle/service/vi/display/vi_display.h
+    hle/service/vi/layer/vi_layer.cpp
+    hle/service/vi/layer/vi_layer.h
     hle/service/vi/vi.cpp
     hle/service/vi/vi.h
     hle/service/vi/vi_m.cpp
@@ -414,8 +427,6 @@ add_library(core STATIC
     loader/deconstructed_rom_directory.h
     loader/elf.cpp
     loader/elf.h
-    loader/linker.cpp
-    loader/linker.h
     loader/loader.cpp
     loader/loader.h
     loader/nax.cpp
@@ -432,8 +443,6 @@ add_library(core STATIC
     loader/xci.h
     memory.cpp
     memory.h
-    memory_hook.cpp
-    memory_hook.h
     memory_setup.h
     perf_stats.cpp
     perf_stats.h
@@ -449,7 +458,7 @@ add_library(core STATIC
 create_target_directory_groups(core)
 
 target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
-target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt lz4_static mbedtls opus unicorn open_source_archives)
+target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt mbedtls opus unicorn open_source_archives)
 if (ENABLE_WEB_SERVICE)
     target_compile_definitions(core PRIVATE -DENABLE_WEB_SERVICE)
     target_link_libraries(core PRIVATE web_service)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index afbda8d8b..dc96e35d5 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,7 +12,9 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/vm_manager.h"
@@ -25,7 +27,6 @@ using Vector = Dynarmic::A64::Vector;
 class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks {
 public:
     explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {}
-    ~ARM_Dynarmic_Callbacks() = default;
 
     u8 MemoryRead8(u64 vaddr) override {
         return Memory::Read8(vaddr);
@@ -99,7 +100,7 @@ public:
     }
 
     void CallSVC(u32 swi) override {
-        Kernel::CallSVC(swi);
+        Kernel::CallSVC(parent.system, swi);
     }
 
     void AddTicks(u64 ticks) override {
@@ -112,14 +113,14 @@ public:
         // Always execute at least one tick.
         amortized_ticks = std::max<u64>(amortized_ticks, 1);
 
-        CoreTiming::AddTicks(amortized_ticks);
+        parent.system.CoreTiming().AddTicks(amortized_ticks);
         num_interpreted_instructions = 0;
     }
     u64 GetTicksRemaining() override {
-        return std::max(CoreTiming::GetDowncount(), 0);
+        return std::max(parent.system.CoreTiming().GetDowncount(), 0);
     }
     u64 GetCNTPCT() override {
-        return CoreTiming::GetTicks();
+        return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
     }
 
     ARM_Dynarmic& parent;
@@ -129,7 +130,7 @@ public:
 };
 
 std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
-    auto* current_process = Core::CurrentProcess();
+    auto* current_process = system.Kernel().CurrentProcess();
     auto** const page_table = current_process->VMManager().page_table.pointers.data();
 
     Dynarmic::A64::UserConfig config;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
     config.tpidr_el0 = &cb->tpidr_el0;
     config.dczid_el0 = 4;
     config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;
 
     // Unpredictable instructions
     config.define_unpredictable_behaviour = true;
@@ -163,7 +164,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)
 
 void ARM_Dynarmic::Run() {
     MICROPROFILE_SCOPE(ARM_Jit_Dynarmic);
-    ASSERT(Memory::GetCurrentPageTable() == current_page_table);
 
     jit->Run();
 }
@@ -172,8 +172,10 @@ void ARM_Dynarmic::Step() {
     cb->InterpreterFallback(jit->GetPC(), 1);
 }
 
-ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index},
+ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
+                           std::size_t core_index)
+    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system},
+      core_index{core_index}, system{system},
       exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
     ThreadContext ctx{};
     inner_unicorn.SaveContext(ctx);
@@ -276,7 +278,6 @@ void ARM_Dynarmic::ClearExclusiveState() {
 
 void ARM_Dynarmic::PageTableChanged() {
     jit = MakeJit();
-    current_page_table = Memory::GetCurrentPageTable();
 }
 
 DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 512bf8ce9..c1db254e8 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,19 +12,16 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 
-namespace Memory {
-struct PageTable;
-}
-
 namespace Core {
 
 class ARM_Dynarmic_Callbacks;
 class DynarmicExclusiveMonitor;
+class System;
 
 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
-    ~ARM_Dynarmic();
+    ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ~ARM_Dynarmic() override;
 
     void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
@@ -62,15 +59,14 @@ private:
     ARM_Unicorn inner_unicorn;
 
     std::size_t core_index;
+    System& system;
     DynarmicExclusiveMonitor& exclusive_monitor;
-
-    Memory::PageTable* current_page_table = nullptr;
 };
 
 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
 public:
     explicit DynarmicExclusiveMonitor(std::size_t core_count);
-    ~DynarmicExclusiveMonitor();
+    ~DynarmicExclusiveMonitor() override;
 
     void SetExclusive(std::size_t core_index, VAddr addr) override;
     void ClearExclusive() override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c455c81fb..4e07fe8b5 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -10,7 +10,6 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/svc.h"
-#include "core/memory.h"
 
 namespace Core {
 
@@ -49,20 +48,6 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
     }
 }
 
-static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
-    u32 esr{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
-
-    auto ec = esr >> 26;
-    auto iss = esr & 0xFFFFFF;
-
-    switch (ec) {
-    case 0x15: // SVC
-        Kernel::CallSVC(iss);
-        break;
-    }
-}
-
 static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
                                void* user_data) {
     ARM_Interface::ThreadContext ctx{};
@@ -72,7 +57,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
     return {};
 }
 
-ARM_Unicorn::ARM_Unicorn() {
+ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
     CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));
 
     auto fpv = 3 << 20;
@@ -177,7 +162,7 @@ void ARM_Unicorn::Run() {
     if (GDBStub::IsServerEnabled()) {
         ExecuteInstructions(std::max(4000000, 0));
     } else {
-        ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
+        ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0));
     }
 }
 
@@ -190,14 +175,15 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
 void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
     MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
     CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    CoreTiming::AddTicks(num_instructions);
+    system.CoreTiming().AddTicks(num_instructions);
     if (GDBStub::IsServerEnabled()) {
-        if (last_bkpt_hit) {
+        if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
             uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
         }
+
         Kernel::Thread* thread = Kernel::GetCurrentThread();
         SaveContext(thread->GetContext());
-        if (last_bkpt_hit || GDBStub::GetCpuStepFlag()) {
+        if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
             last_bkpt_hit = false;
             GDBStub::Break();
             GDBStub::SendTrap(thread, 5);
@@ -272,4 +258,20 @@ void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
     last_bkpt_hit = true;
 }
 
+void ARM_Unicorn::InterruptHook(uc_engine* uc, u32 int_no, void* user_data) {
+    u32 esr{};
+    CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
+
+    const auto ec = esr >> 26;
+    const auto iss = esr & 0xFFFFFF;
+
+    auto* const arm_instance = static_cast<ARM_Unicorn*>(user_data);
+
+    switch (ec) {
+    case 0x15: // SVC
+        Kernel::CallSVC(arm_instance->system, iss);
+        break;
+    }
+}
+
 } // namespace Core
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 75761950b..209fc16ad 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -11,10 +11,13 @@
 
 namespace Core {
 
+class System;
+
 class ARM_Unicorn final : public ARM_Interface {
 public:
-    ARM_Unicorn();
-    ~ARM_Unicorn();
+    explicit ARM_Unicorn(System& system);
+    ~ARM_Unicorn() override;
+
     void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
     void UnmapMemory(VAddr address, std::size_t size) override;
@@ -42,9 +45,12 @@ public:
     void RecordBreak(GDBStub::BreakpointAddress bkpt);
 
 private:
+    static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);
+
     uc_engine* uc{};
+    System& system;
     GDBStub::BreakpointAddress last_bkpt{};
-    bool last_bkpt_hit;
+    bool last_bkpt_hit = false;
 };
 
 } // namespace Core
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 572814e4b..bc9e887b6 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -17,6 +17,7 @@
 #include "core/core_timing.h"
 #include "core/cpu_core_manager.h"
 #include "core/file_sys/mode.h"
+#include "core/file_sys/registered_cache.h"
 #include "core/file_sys/vfs_concat.h"
 #include "core/file_sys/vfs_real.h"
 #include "core/gdbstub/gdbstub.h"
@@ -32,11 +33,13 @@
 #include "core/perf_stats.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
+#include "file_sys/cheat_engine.h"
 #include "frontend/applets/profile_select.h"
 #include "frontend/applets/software_keyboard.h"
 #include "frontend/applets/web_browser.h"
 #include "video_core/debug_utils/debug_utils.h"
-#include "video_core/gpu.h"
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"
 
@@ -78,6 +81,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
     return vfs->OpenFile(path, FileSys::Mode::Read);
 }
 struct System::Impl {
+    explicit Impl(System& system) : kernel{system} {}
 
     Cpu& CurrentCpuCore() {
         return cpu_core_manager.GetCurrentCore();
@@ -94,7 +98,7 @@ struct System::Impl {
     ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
         LOG_DEBUG(HW_Memory, "initialized OK");
 
-        CoreTiming::Init();
+        core_timing.Initialize();
         kernel.Initialize();
 
         const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -105,6 +109,8 @@ struct System::Impl {
         // Create a default fs if one doesn't already exist.
         if (virtual_filesystem == nullptr)
             virtual_filesystem = std::make_shared<FileSys::RealVfsFilesystem>();
+        if (content_provider == nullptr)
+            content_provider = std::make_unique<FileSys::ContentProviderUnion>();
 
         /// Create default implementations of applets if one is not provided.
         if (profile_selector == nullptr)
@@ -114,24 +120,30 @@ struct System::Impl {
         if (web_browser == nullptr)
             web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
 
-        auto main_process = Kernel::Process::Create(kernel, "main");
+        auto main_process = Kernel::Process::Create(system, "main");
         kernel.MakeCurrentProcess(main_process.get());
 
         telemetry_session = std::make_unique<Core::TelemetrySession>();
         service_manager = std::make_shared<Service::SM::ServiceManager>();
 
-        Service::Init(service_manager, *virtual_filesystem);
+        Service::Init(service_manager, system, *virtual_filesystem);
         GDBStub::Init();
 
-        renderer = VideoCore::CreateRenderer(emu_window);
+        renderer = VideoCore::CreateRenderer(emu_window, system);
         if (!renderer->Init()) {
             return ResultStatus::ErrorVideoCore;
         }
 
-        gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
+        is_powered_on = true;
+
+        if (Settings::values.use_asynchronous_gpu_emulation) {
+            gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
+        } else {
+            gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
+        }
 
         cpu_core_manager.Initialize(system);
-        is_powered_on = true;
+
         LOG_DEBUG(Core, "Initialized OK");
 
         // Reset counters and set time origin to current frame
@@ -175,19 +187,20 @@ struct System::Impl {
             return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
                                              static_cast<u32>(load_result));
         }
+
         status = ResultStatus::Success;
         return status;
     }
 
     void Shutdown() {
         // Log last frame performance stats
-        auto perf_results = GetAndResetPerfStats();
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
-                             perf_results.emulation_speed * 100.0);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
-                             perf_results.game_fps);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
-                             perf_results.frametime * 1000.0);
+        const auto perf_results = GetAndResetPerfStats();
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
+                                    perf_results.emulation_speed * 100.0);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
+                                    perf_results.game_fps);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
+                                    perf_results.frametime * 1000.0);
 
         is_powered_on = false;
 
@@ -196,6 +209,7 @@ struct System::Impl {
         GDBStub::Shutdown();
         Service::Shutdown();
         service_manager.reset();
+        cheat_engine.reset();
         telemetry_session.reset();
         gpu_core.reset();
 
@@ -204,7 +218,7 @@ struct System::Impl {
 
         // Shutdown kernel and core timing
         kernel.Shutdown();
-        CoreTiming::Shutdown();
+        core_timing.Shutdown();
 
         // Close app loader
         app_loader.reset();
@@ -231,12 +245,15 @@ struct System::Impl {
     }
 
     PerfStatsResults GetAndResetPerfStats() {
-        return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
+        return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
     }
 
+    Timing::CoreTiming core_timing;
     Kernel::KernelCore kernel;
     /// RealVfsFilesystem instance
     FileSys::VirtualFilesystem virtual_filesystem;
+    /// ContentProviderUnion instance
+    std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
     /// AppLoader used to load the current executing application
     std::unique_ptr<Loader::AppLoader> app_loader;
     std::unique_ptr<VideoCore::RendererBase> renderer;
@@ -245,6 +262,8 @@ struct System::Impl {
     CpuCoreManager cpu_core_manager;
     bool is_powered_on = false;
 
+    std::unique_ptr<FileSys::CheatEngine> cheat_engine;
+
     /// Frontend applets
     std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector;
     std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard;
@@ -263,7 +282,7 @@ struct System::Impl {
     Core::FrameLimiter frame_limiter;
 };
 
-System::System() : impl{std::make_unique<Impl>()} {}
+System::System() : impl{std::make_unique<Impl>(*this)} {}
 System::~System() = default;
 
 Cpu& System::CurrentCpuCore() {
@@ -395,6 +414,14 @@ const Kernel::KernelCore& System::Kernel() const {
     return impl->kernel;
 }
 
+Timing::CoreTiming& System::CoreTiming() {
+    return impl->core_timing;
+}
+
+const Timing::CoreTiming& System::CoreTiming() const {
+    return impl->core_timing;
+}
+
 Core::PerfStats& System::GetPerfStats() {
     return impl->perf_stats;
 }
@@ -435,6 +462,13 @@ Tegra::DebugContext* System::GetGPUDebugContext() const {
     return impl->debug_context.get();
 }
 
+void System::RegisterCheatList(const std::vector<FileSys::CheatList>& list,
+                               const std::string& build_id, VAddr code_region_start,
+                               VAddr code_region_end) {
+    impl->cheat_engine = std::make_unique<FileSys::CheatEngine>(*this, list, build_id,
+                                                                code_region_start, code_region_end);
+}
+
 void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
     impl->virtual_filesystem = std::move(vfs);
 }
@@ -459,6 +493,27 @@ const Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const {
     return *impl->software_keyboard;
 }
 
+void System::SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider) {
+    impl->content_provider = std::move(provider);
+}
+
+FileSys::ContentProvider& System::GetContentProvider() {
+    return *impl->content_provider;
+}
+
+const FileSys::ContentProvider& System::GetContentProvider() const {
+    return *impl->content_provider;
+}
+
+void System::RegisterContentProvider(FileSys::ContentProviderUnionSlot slot,
+                                     FileSys::ContentProvider* provider) {
+    impl->content_provider->SetSlot(slot, provider);
+}
+
+void System::ClearContentProvider(FileSys::ContentProviderUnionSlot slot) {
+    impl->content_provider->ClearSlot(slot);
+}
+
 void System::SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet) {
     impl->web_browser = std::move(applet);
 }
diff --git a/src/core/core.h b/src/core/core.h
index 511a5ad3a..82b2e087e 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -20,6 +20,10 @@ class WebBrowserApplet;
 } // namespace Core::Frontend
 
 namespace FileSys {
+class CheatList;
+class ContentProvider;
+class ContentProviderUnion;
+enum class ContentProviderUnionSlot;
 class VfsFilesystem;
 } // namespace FileSys
 
@@ -47,6 +51,10 @@ namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore
 
+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {
 
 class ARM_Interface;
@@ -205,6 +213,12 @@ public:
     /// Provides a constant pointer to the current process.
     const Kernel::Process* CurrentProcess() const;
 
+    /// Provides a reference to the core timing instance.
+    Timing::CoreTiming& CoreTiming();
+
+    /// Provides a constant reference to the core timing instance.
+    const Timing::CoreTiming& CoreTiming() const;
+
     /// Provides a reference to the kernel instance.
     Kernel::KernelCore& Kernel();
 
@@ -243,6 +257,9 @@ public:
 
     std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
 
+    void RegisterCheatList(const std::vector<FileSys::CheatList>& list, const std::string& build_id,
+                           VAddr code_region_start, VAddr code_region_end);
+
     void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet);
 
     const Frontend::ProfileSelectApplet& GetProfileSelector() const;
@@ -256,6 +273,17 @@ public:
     Frontend::WebBrowserApplet& GetWebBrowser();
     const Frontend::WebBrowserApplet& GetWebBrowser() const;
 
+    void SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider);
+
+    FileSys::ContentProvider& GetContentProvider();
+
+    const FileSys::ContentProvider& GetContentProvider() const;
+
+    void RegisterContentProvider(FileSys::ContentProviderUnionSlot slot,
+                                 FileSys::ContentProvider* provider);
+
+    void ClearContentProvider(FileSys::ContentProviderUnionSlot slot);
+
 private:
     System();
 
@@ -283,10 +311,6 @@ inline ARM_Interface& CurrentArmInterface() {
     return System::GetInstance().CurrentArmInterface();
 }
 
-inline TelemetrySession& Telemetry() {
-    return System::GetInstance().TelemetrySession();
-}
-
 inline Kernel::Process* CurrentProcess() {
     return System::GetInstance().CurrentProcess();
 }
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index fffda8a99..ba63c3e61 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
 #endif
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/scheduler.h"
@@ -21,7 +22,7 @@
 namespace Core {
 
 void CpuBarrier::NotifyEnd() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     end = true;
     condition.notify_all();
 }
@@ -33,7 +34,7 @@ bool CpuBarrier::Rendezvous() {
     }
 
     if (!end) {
-        std::unique_lock<std::mutex> lock(mutex);
+        std::unique_lock lock{mutex};
 
         --cores_waiting;
         if (!cores_waiting) {
@@ -49,20 +50,21 @@ bool CpuBarrier::Rendezvous() {
     return false;
 }
 
-Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, core_index{core_index} {
+Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
+         std::size_t core_index)
+    : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
     if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
-        arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index);
+        arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
 #else
-        arm_interface = std::make_unique<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>(system);
         LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
     } else {
-        arm_interface = std::make_unique<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>(system);
     }
 
-    scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
+    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
 }
 
 Cpu::~Cpu() = default;
@@ -93,14 +95,14 @@ void Cpu::RunLoop(bool tight_loop) {
 
         if (IsMainCore()) {
             // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
-            CoreTiming::Idle();
-            CoreTiming::Advance();
+            core_timing.Idle();
+            core_timing.Advance();
         }
 
         PrepareReschedule();
     } else {
         if (IsMainCore()) {
-            CoreTiming::Advance();
+            core_timing.Advance();
         }
 
         if (tight_loop) {
@@ -129,7 +131,7 @@ void Cpu::Reschedule() {
 
     reschedule_pending = false;
     // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     scheduler->Reschedule();
 }
 
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 1d2bdc6cd..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -16,6 +16,14 @@ class Scheduler;
 }
 
 namespace Core {
+class System;
+}
+
+namespace Core::Timing {
+class CoreTiming;
+}
+
+namespace Core {
 
 class ARM_Interface;
 class ExclusiveMonitor;
@@ -41,7 +49,8 @@ private:
 
 class Cpu {
 public:
-    Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index);
+    Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
+        std::size_t core_index);
     ~Cpu();
 
     void RunLoop(bool tight_loop = true);
@@ -82,6 +91,7 @@ private:
     std::unique_ptr<ARM_Interface> arm_interface;
     CpuBarrier& cpu_barrier;
     std::unique_ptr<Kernel::Scheduler> scheduler;
+    Timing::CoreTiming& core_timing;
 
     std::atomic<bool> reschedule_pending = false;
     std::size_t core_index;
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 7953c8720..41adb2302 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -8,149 +8,98 @@
 #include <mutex>
 #include <string>
 #include <tuple>
-#include <unordered_map>
-#include <vector>
+
 #include "common/assert.h"
 #include "common/thread.h"
-#include "common/threadsafe_queue.h"
 #include "core/core_timing_util.h"
 
-namespace CoreTiming {
-
-static s64 global_timer;
-static int slice_length;
-static int downcount;
+namespace Core::Timing {
 
-struct EventType {
-    TimedCallback callback;
-    const std::string* name;
-};
+constexpr int MAX_SLICE_LENGTH = 20000;
 
-struct Event {
+struct CoreTiming::Event {
     s64 time;
     u64 fifo_order;
     u64 userdata;
     const EventType* type;
-};
-
-// Sort by time, unless the times are the same, in which case sort by the order added to the queue
-static bool operator>(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
-}
-
-static bool operator<(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
-}
-
-// unordered_map stores each element separately as a linked list node so pointers to elements
-// remain stable regardless of rehashes/resizing.
-static std::unordered_map<std::string, EventType> event_types;
 
-// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
-// We don't use std::priority_queue because we need to be able to serialize, unserialize and
-// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated
-// by the standard adaptor class.
-static std::vector<Event> event_queue;
-static u64 event_fifo_id;
-// the queue for storing the events from other threads threadsafe until they will be added
-// to the event_queue by the emu thread
-static Common::MPSCQueue<Event, false> ts_queue;
-
-// the queue for unscheduling the events from other threads threadsafe
-static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
-
-constexpr int MAX_SLICE_LENGTH = 20000;
-
-static s64 idled_cycles;
-
-// Are we in a function that has been called from Advance()
-// If events are sheduled from a function that gets called from Advance(),
-// don't change slice_length and downcount.
-static bool is_global_timer_sane;
-
-static EventType* ev_lost = nullptr;
-
-static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
-
-EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
-    // check for existing type with same name.
-    // we want event type names to remain unique so that we can use them for serialization.
-    ASSERT_MSG(event_types.find(name) == event_types.end(),
-               "CoreTiming Event \"{}\" is already registered. Events should only be registered "
-               "during Init to avoid breaking save states.",
-               name.c_str());
+    // Sort by time, unless the times are the same, in which case sort by
+    // the order added to the queue
+    friend bool operator>(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+    }
 
-    auto info = event_types.emplace(name, EventType{callback, nullptr});
-    EventType* event_type = &info.first->second;
-    event_type->name = &info.first->first;
-    return event_type;
-}
+    friend bool operator<(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+    }
+};
 
-void UnregisterAllEvents() {
-    ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
-    event_types.clear();
-}
+CoreTiming::CoreTiming() = default;
+CoreTiming::~CoreTiming() = default;
 
-void Init() {
+void CoreTiming::Initialize() {
     downcount = MAX_SLICE_LENGTH;
     slice_length = MAX_SLICE_LENGTH;
     global_timer = 0;
     idled_cycles = 0;
 
-    // The time between CoreTiming being intialized and the first call to Advance() is considered
+    // The time between CoreTiming being initialized and the first call to Advance() is considered
     // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
     // executing the first cycle of each slice to prepare the slice length and downcount for
     // that slice.
     is_global_timer_sane = true;
 
     event_fifo_id = 0;
-    ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
+
+    const auto empty_timed_callback = [](u64, s64) {};
+    ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
 }
 
-void Shutdown() {
+void CoreTiming::Shutdown() {
     MoveEvents();
     ClearPendingEvents();
     UnregisterAllEvents();
 }
 
-// This should only be called from the CPU thread. If you are calling
-// it from any other thread, you are doing something evil
-u64 GetTicks() {
-    u64 ticks = static_cast<u64>(global_timer);
-    if (!is_global_timer_sane) {
-        ticks += slice_length - downcount;
-    }
-    return ticks;
-}
-
-void AddTicks(u64 ticks) {
-    downcount -= static_cast<int>(ticks);
-}
+EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
+    // check for existing type with same name.
+    // we want event type names to remain unique so that we can use them for serialization.
+    ASSERT_MSG(event_types.find(name) == event_types.end(),
+               "CoreTiming Event \"{}\" is already registered. Events should only be registered "
+               "during Init to avoid breaking save states.",
+               name.c_str());
 
-u64 GetIdleTicks() {
-    return static_cast<u64>(idled_cycles);
+    auto info = event_types.emplace(name, EventType{callback, nullptr});
+    EventType* event_type = &info.first->second;
+    event_type->name = &info.first->first;
+    return event_type;
 }
 
-void ClearPendingEvents() {
-    event_queue.clear();
+void CoreTiming::UnregisterAllEvents() {
+    ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
+    event_types.clear();
 }
 
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
     ASSERT(event_type != nullptr);
-    s64 timeout = GetTicks() + cycles_into_future;
+    const s64 timeout = GetTicks() + cycles_into_future;
+
     // If this event needs to be scheduled before the next advance(), force one early
-    if (!is_global_timer_sane)
+    if (!is_global_timer_sane) {
         ForceExceptionCheck(cycles_into_future);
+    }
+
     event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
     std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
 }
 
-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                         u64 userdata) {
     ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
 }
 
-void UnscheduleEvent(const EventType* event_type, u64 userdata) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type == event_type && e.userdata == userdata;
     });
 
@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
     }
 }
 
-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
+void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
     unschedule_queue.Push(std::make_pair(event_type, userdata));
 }
 
-void RemoveEvent(const EventType* event_type) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
-                              [&](const Event& e) { return e.type == event_type; });
+u64 CoreTiming::GetTicks() const {
+    u64 ticks = static_cast<u64>(global_timer);
+    if (!is_global_timer_sane) {
+        ticks += slice_length - downcount;
+    }
+    return ticks;
+}
+
+u64 CoreTiming::GetIdleTicks() const {
+    return static_cast<u64>(idled_cycles);
+}
+
+void CoreTiming::AddTicks(u64 ticks) {
+    downcount -= static_cast<int>(ticks);
+}
+
+void CoreTiming::ClearPendingEvents() {
+    event_queue.clear();
+}
+
+void CoreTiming::RemoveEvent(const EventType* event_type) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
+                                    [&](const Event& e) { return e.type == event_type; });
 
     // Removing random items breaks the invariant so we have to re-establish it.
     if (itr != event_queue.end()) {
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
     }
 }
 
-void RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
+void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
     MoveEvents();
     RemoveEvent(event_type);
 }
 
-void ForceExceptionCheck(s64 cycles) {
+void CoreTiming::ForceExceptionCheck(s64 cycles) {
     cycles = std::max<s64>(0, cycles);
-    if (downcount > cycles) {
-        // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
-        // here. Account for cycles already executed by adjusting the g.slice_length
-        slice_length -= downcount - static_cast<int>(cycles);
-        downcount = static_cast<int>(cycles);
+    if (downcount <= cycles) {
+        return;
     }
+
+    // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
+    // here. Account for cycles already executed by adjusting the g.slice_length
+    slice_length -= downcount - static_cast<int>(cycles);
+    downcount = static_cast<int>(cycles);
 }
 
-void MoveEvents() {
+void CoreTiming::MoveEvents() {
     for (Event ev; ts_queue.Pop(ev);) {
         ev.fifo_order = event_fifo_id++;
         event_queue.emplace_back(std::move(ev));
@@ -199,13 +170,13 @@ void MoveEvents() {
     }
 }
 
-void Advance() {
+void CoreTiming::Advance() {
     MoveEvents();
     for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
         UnscheduleEvent(ev.first, ev.second);
     }
 
-    int cycles_executed = slice_length - downcount;
+    const int cycles_executed = slice_length - downcount;
     global_timer += cycles_executed;
     slice_length = MAX_SLICE_LENGTH;
 
@@ -215,7 +186,7 @@ void Advance() {
         Event evt = std::move(event_queue.front());
         std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
         event_queue.pop_back();
-        evt.type->callback(evt.userdata, static_cast<int>(global_timer - evt.time));
+        evt.type->callback(evt.userdata, global_timer - evt.time);
     }
 
     is_global_timer_sane = false;
@@ -229,17 +200,17 @@ void Advance() {
     downcount = slice_length;
 }
 
-void Idle() {
+void CoreTiming::Idle() {
     idled_cycles += downcount;
     downcount = 0;
 }
 
-std::chrono::microseconds GetGlobalTimeUs() {
+std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
     return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
 }
 
-int GetDowncount() {
+int CoreTiming::GetDowncount() const {
     return downcount;
 }
 
-} // namespace CoreTiming
+} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 9ed757bd7..9d2efde37 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -4,6 +4,27 @@
 
 #pragma once
 
+#include <chrono>
+#include <functional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
+
+namespace Core::Timing {
+
+/// A callback that may be scheduled for a particular core timing event.
+using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
+
+/// Contains the characteristics of a particular event.
+struct EventType {
+    /// The event's callback function.
+    TimedCallback callback;
+    /// A pointer to the name of the event.
+    const std::string* name;
+};
+
 /**
  * This is a system to schedule events into the emulated machine's future. Time is measured
  * in main CPU clock cycles.
@@ -16,80 +37,120 @@
  * inside callback:
  *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
  */
-
-#include <chrono>
-#include <functional>
-#include <string>
-#include "common/common_types.h"
-
-namespace CoreTiming {
-
-struct EventType;
-
-using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
-
-/**
- * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
- * required to end slice -1 and start slice 0 before the first cycle of code is executed.
- */
-void Init();
-void Shutdown();
-
-/**
- * This should only be called from the emu thread, if you are calling it any other thread, you are
- * doing something evil
- */
-u64 GetTicks();
-u64 GetIdleTicks();
-void AddTicks(u64 ticks);
-
-/**
- * Returns the event_type identifier. if name is not unique, it will assert.
- */
-EventType* RegisterEvent(const std::string& name, TimedCallback callback);
-void UnregisterAllEvents();
-
-/**
- * After the first Advance, the slice lengths and the downcount will be reduced whenever an event
- * is scheduled earlier than the current values.
- * Scheduling from a callback will not update the downcount until the Advance() completes.
- */
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
-
-/**
- * This is to be called when outside of hle threads, such as the graphics thread, wants to
- * schedule things to be executed on the main thread.
- * Not that this doesn't change slice_length and thus events scheduled by this might be called
- * with a delay of up to MAX_SLICE_LENGTH
- */
-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata);
-
-void UnscheduleEvent(const EventType* event_type, u64 userdata);
-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
-
-/// We only permit one event of each type in the queue at a time.
-void RemoveEvent(const EventType* event_type);
-void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
-
-/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
- * the previous timing slice and begins the next one, you must Advance from the previous
- * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
- * Advance() is required to initialize the slice length before the first cycle of emulated
- * instructions is executed.
- */
-void Advance();
-void MoveEvents();
-
-/// Pretend that the main CPU has executed enough cycles to reach the next event.
-void Idle();
-
-/// Clear all pending events. This should ONLY be done on exit.
-void ClearPendingEvents();
-
-void ForceExceptionCheck(s64 cycles);
-
-std::chrono::microseconds GetGlobalTimeUs();
-
-int GetDowncount();
-
-} // namespace CoreTiming
+class CoreTiming {
+public:
+    CoreTiming();
+    ~CoreTiming();
+
+    CoreTiming(const CoreTiming&) = delete;
+    CoreTiming(CoreTiming&&) = delete;
+
+    CoreTiming& operator=(const CoreTiming&) = delete;
+    CoreTiming& operator=(CoreTiming&&) = delete;
+
+    /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
+    /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
+    void Initialize();
+
+    /// Tears down all timing related functionality.
+    void Shutdown();
+
+    /// Registers a core timing event with the given name and callback.
+    ///
+    /// @param name     The name of the core timing event to register.
+    /// @param callback The callback to execute for the event.
+    ///
+    /// @returns An EventType instance representing the registered event.
+    ///
+    /// @pre The name of the event being registered must be unique among all
+    ///      registered events.
+    ///
+    EventType* RegisterEvent(const std::string& name, TimedCallback callback);
+
+    /// Unregisters all registered events thus far.
+    void UnregisterAllEvents();
+
+    /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
+    /// event is scheduled earlier than the current values.
+    ///
+    /// Scheduling from a callback will not update the downcount until the Advance() completes.
+    void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
+
+    /// This is to be called when outside of hle threads, such as the graphics thread, wants to
+    /// schedule things to be executed on the main thread.
+    ///
+    /// @note This doesn't change slice_length and thus events scheduled by this might be
+    /// called with a delay of up to MAX_SLICE_LENGTH
+    void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                 u64 userdata = 0);
+
+    void UnscheduleEvent(const EventType* event_type, u64 userdata);
+    void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
+
+    /// We only permit one event of each type in the queue at a time.
+    void RemoveEvent(const EventType* event_type);
+    void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
+
+    void ForceExceptionCheck(s64 cycles);
+
+    /// This should only be called from the emu thread, if you are calling it any other thread,
+    /// you are doing something evil
+    u64 GetTicks() const;
+
+    u64 GetIdleTicks() const;
+
+    void AddTicks(u64 ticks);
+
+    /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
+    /// the previous timing slice and begins the next one, you must Advance from the previous
+    /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
+    /// Advance() is required to initialize the slice length before the first cycle of emulated
+    /// instructions is executed.
+    void Advance();
+
+    /// Pretend that the main CPU has executed enough cycles to reach the next event.
+    void Idle();
+
+    std::chrono::microseconds GetGlobalTimeUs() const;
+
+    int GetDowncount() const;
+
+private:
+    struct Event;
+
+    /// Clear all pending events. This should ONLY be done on exit.
+    void ClearPendingEvents();
+    void MoveEvents();
+
+    s64 global_timer = 0;
+    s64 idled_cycles = 0;
+    int slice_length = 0;
+    int downcount = 0;
+
+    // Are we in a function that has been called from Advance()
+    // If events are scheduled from a function that gets called from Advance(),
+    // don't change slice_length and downcount.
+    bool is_global_timer_sane = false;
+
+    // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
+    // We don't use std::priority_queue because we need to be able to serialize, unserialize and
+    // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
+    // accomodated by the standard adaptor class.
+    std::vector<Event> event_queue;
+    u64 event_fifo_id = 0;
+
+    // Stores each element separately as a linked list node so pointers to elements
+    // remain stable regardless of rehashes/resizing.
+    std::unordered_map<std::string, EventType> event_types;
+
+    // The queue for storing the events from other threads threadsafe until they will be added
+    // to the event_queue by the emu thread
+    Common::MPSCQueue<Event> ts_queue;
+
+    // The queue for unscheduling the events from other threads threadsafe
+    Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
+
+    EventType* ev_lost = nullptr;
+};
+
+} // namespace Core::Timing
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 73dea4edb..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,8 +7,9 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"
 
-namespace CoreTiming {
+namespace Core::Timing {
 
 constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
 
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
     return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }
 
-} // namespace CoreTiming
+u64 CpuCyclesToClockCycles(u64 ticks) {
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
+} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 5c3718782..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,11 +6,12 @@
 
 #include "common/common_types.h"
 
-namespace CoreTiming {
+namespace Core::Timing {
 
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.
 
 inline s64 msToCycles(int ms) {
     // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
     return cycles * 1000 / BASE_CLOCK_RATE;
 }
 
-} // namespace CoreTiming
+u64 CpuCyclesToClockCycles(u64 ticks);
+
+} // namespace Core::Timing
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 769a6fefa..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,7 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
     exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
 
     for (std::size_t index = 0; index < cores.size(); ++index) {
-        cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index);
+        cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
     }
 
     // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index ca12fb4ab..dc006e2bb 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -22,6 +22,7 @@
 #include "common/file_util.h"
 #include "common/hex_util.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/crypto/aes_util.h"
 #include "core/crypto/key_manager.h"
 #include "core/crypto/partition_data_manager.h"
@@ -398,7 +399,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
 }
 
 void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
-    std::ifstream file(filename);
+    std::ifstream file;
+    OpenFStream(file, filename, std::ios_base::in);
     if (!file.is_open())
         return;
 
@@ -793,7 +795,7 @@ void KeyManager::DeriveBase() {
 
 void KeyManager::DeriveETicket(PartitionDataManager& data) {
     // ETicket keys
-    const auto es = Service::FileSystem::GetUnionContents().GetEntry(
+    const auto es = Core::System::GetInstance().GetContentProvider().GetEntry(
         0x0100000000000033, FileSys::ContentRecordType::Program);
 
     if (es == nullptr)
diff --git a/src/core/file_sys/cheat_engine.cpp b/src/core/file_sys/cheat_engine.cpp
new file mode 100644
index 000000000..b06c2f20a
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.cpp
@@ -0,0 +1,492 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <locale>
+#include "common/hex_util.h"
+#include "common/microprofile.h"
+#include "common/swap.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
+#include "core/file_sys/cheat_engine.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/service/hid/controllers/npad.h"
+#include "core/hle/service/hid/hid.h"
+#include "core/hle/service/sm/sm.h"
+
+namespace FileSys {
+
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
+constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
+
+u64 Cheat::Address() const {
+    u64 out;
+    std::memcpy(&out, raw.data(), sizeof(u64));
+    return Common::swap64(out) & 0xFFFFFFFFFF;
+}
+
+u64 Cheat::ValueWidth(u64 offset) const {
+    return Value(offset, width);
+}
+
+u64 Cheat::Value(u64 offset, u64 width) const {
+    u64 out;
+    std::memcpy(&out, raw.data() + offset, sizeof(u64));
+    out = Common::swap64(out);
+    if (width == 8)
+        return out;
+    return out & ((1ull << (width * CHAR_BIT)) - 1);
+}
+
+u32 Cheat::KeypadValue() const {
+    u32 out;
+    std::memcpy(&out, raw.data(), sizeof(u32));
+    return Common::swap32(out) & 0x0FFFFFFF;
+}
+
+void CheatList::SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end,
+                                    VAddr heap_end, MemoryWriter writer, MemoryReader reader) {
+    this->main_region_begin = main_begin;
+    this->main_region_end = main_end;
+    this->heap_region_begin = heap_begin;
+    this->heap_region_end = heap_end;
+    this->writer = writer;
+    this->reader = reader;
+}
+
+MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
+
+void CheatList::Execute() {
+    MICROPROFILE_SCOPE(Cheat_Engine);
+
+    std::fill(scratch.begin(), scratch.end(), 0);
+    in_standard = false;
+    for (std::size_t i = 0; i < master_list.size(); ++i) {
+        LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, master_list[i].first);
+        current_block = i;
+        ExecuteBlock(master_list[i].second);
+    }
+
+    in_standard = true;
+    for (std::size_t i = 0; i < standard_list.size(); ++i) {
+        LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, standard_list[i].first);
+        current_block = i;
+        ExecuteBlock(standard_list[i].second);
+    }
+}
+
+CheatList::CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard)
+    : master_list{std::move(master)}, standard_list{std::move(standard)}, system{&system_} {}
+
+bool CheatList::EvaluateConditional(const Cheat& cheat) const {
+    using ComparisonFunction = bool (*)(u64, u64);
+    constexpr std::array<ComparisonFunction, 6> comparison_functions{
+        [](u64 a, u64 b) { return a > b; },  [](u64 a, u64 b) { return a >= b; },
+        [](u64 a, u64 b) { return a < b; },  [](u64 a, u64 b) { return a <= b; },
+        [](u64 a, u64 b) { return a == b; }, [](u64 a, u64 b) { return a != b; },
+    };
+
+    if (cheat.type == CodeType::ConditionalInput) {
+        const auto applet_resource =
+            system->ServiceManager().GetService<Service::HID::Hid>("hid")->GetAppletResource();
+        if (applet_resource == nullptr) {
+            LOG_WARNING(
+                Common_Filesystem,
+                "Attempted to evaluate input conditional, but applet resource is not initialized!");
+            return false;
+        }
+
+        const auto press_state =
+            applet_resource
+                ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad)
+                .GetAndResetPressState();
+        return ((press_state & cheat.KeypadValue()) & KEYPAD_BITMASK) != 0;
+    }
+
+    ASSERT(cheat.type == CodeType::Conditional);
+
+    const auto offset =
+        cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
+    ASSERT(static_cast<u8>(cheat.comparison_op.Value()) < 6);
+    auto* function = comparison_functions[static_cast<u8>(cheat.comparison_op.Value())];
+    const auto addr = cheat.Address() + offset;
+
+    return function(reader(cheat.width, SanitizeAddress(addr)), cheat.ValueWidth(8));
+}
+
+void CheatList::ProcessBlockPairs(const Block& block) {
+    block_pairs.clear();
+
+    u64 scope = 0;
+    std::map<u64, u64> pairs;
+
+    for (std::size_t i = 0; i < block.size(); ++i) {
+        const auto& cheat = block[i];
+
+        switch (cheat.type) {
+        case CodeType::Conditional:
+        case CodeType::ConditionalInput:
+            pairs.insert_or_assign(scope, i);
+            ++scope;
+            break;
+        case CodeType::EndConditional: {
+            --scope;
+            const auto idx = pairs.at(scope);
+            block_pairs.insert_or_assign(idx, i);
+            break;
+        }
+        case CodeType::Loop: {
+            if (cheat.end_of_loop) {
+                --scope;
+                const auto idx = pairs.at(scope);
+                block_pairs.insert_or_assign(idx, i);
+            } else {
+                pairs.insert_or_assign(scope, i);
+                ++scope;
+            }
+            break;
+        }
+        }
+    }
+}
+
+void CheatList::WriteImmediate(const Cheat& cheat) {
+    const auto offset =
+        cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
+    const auto& register_3 = scratch.at(cheat.register_3);
+
+    const auto addr = cheat.Address() + offset + register_3;
+    LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}", addr,
+              cheat.Value(8, cheat.width));
+    writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(8));
+}
+
+void CheatList::BeginConditional(const Cheat& cheat) {
+    if (EvaluateConditional(cheat)) {
+        return;
+    }
+
+    const auto iter = block_pairs.find(current_index);
+    ASSERT(iter != block_pairs.end());
+    current_index = iter->second - 1;
+}
+
+void CheatList::EndConditional(const Cheat& cheat) {
+    LOG_DEBUG(Common_Filesystem, "Ending conditional block.");
+}
+
+void CheatList::Loop(const Cheat& cheat) {
+    if (cheat.end_of_loop.Value())
+        ASSERT(!cheat.end_of_loop.Value());
+
+    auto& register_3 = scratch.at(cheat.register_3);
+    const auto iter = block_pairs.find(current_index);
+    ASSERT(iter != block_pairs.end());
+    ASSERT(iter->first < iter->second);
+
+    const s32 initial_value = static_cast<s32>(cheat.Value(4, sizeof(s32)));
+    for (s32 i = initial_value; i >= 0; --i) {
+        register_3 = static_cast<u64>(i);
+        for (std::size_t c = iter->first + 1; c < iter->second; ++c) {
+            current_index = c;
+            ExecuteSingleCheat(
+                (in_standard ? standard_list : master_list)[current_block].second[c]);
+        }
+    }
+
+    current_index = iter->second;
+}
+
+void CheatList::LoadImmediate(const Cheat& cheat) {
+    auto& register_3 = scratch.at(cheat.register_3);
+
+    LOG_DEBUG(Common_Filesystem, "setting register={:01X} equal to value={:016X}", cheat.register_3,
+              cheat.Value(4, 8));
+    register_3 = cheat.Value(4, 8);
+}
+
+void CheatList::LoadIndexed(const Cheat& cheat) {
+    const auto offset =
+        cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
+    auto& register_3 = scratch.at(cheat.register_3);
+
+    const auto addr = (cheat.load_from_register.Value() ? register_3 : offset) + cheat.Address();
+    LOG_DEBUG(Common_Filesystem, "writing indexed value to register={:01X}, addr={:016X}",
+              cheat.register_3, addr);
+    register_3 = reader(cheat.width, SanitizeAddress(addr));
+}
+
+void CheatList::StoreIndexed(const Cheat& cheat) {
+    const auto& register_3 = scratch.at(cheat.register_3);
+
+    const auto addr =
+        register_3 + (cheat.add_additional_register.Value() ? scratch.at(cheat.register_6) : 0);
+    LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}",
+              cheat.Value(4, cheat.width), addr);
+    writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(4));
+}
+
+void CheatList::RegisterArithmetic(const Cheat& cheat) {
+    using ArithmeticFunction = u64 (*)(u64, u64);
+    constexpr std::array<ArithmeticFunction, 5> arithmetic_functions{
+        [](u64 a, u64 b) { return a + b; },  [](u64 a, u64 b) { return a - b; },
+        [](u64 a, u64 b) { return a * b; },  [](u64 a, u64 b) { return a << b; },
+        [](u64 a, u64 b) { return a >> b; },
+    };
+
+    using ArithmeticOverflowCheck = bool (*)(u64, u64);
+    constexpr std::array<ArithmeticOverflowCheck, 5> arithmetic_overflow_checks{
+        [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() - b); },       // a + b
+        [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() + b); },       // a - b
+        [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() / b); },       // a * b
+        [](u64 a, u64 b) { return b >= 64 || (a & ~((1ull << (64 - b)) - 1)) != 0; }, // a << b
+        [](u64 a, u64 b) { return b >= 64 || (a & ((1ull << b) - 1)) != 0; },         // a >> b
+    };
+
+    static_assert(sizeof(arithmetic_functions) == sizeof(arithmetic_overflow_checks),
+                  "Missing or have extra arithmetic overflow checks compared to functions!");
+
+    auto& register_3 = scratch.at(cheat.register_3);
+
+    ASSERT(static_cast<u8>(cheat.arithmetic_op.Value()) < 5);
+    auto* function = arithmetic_functions[static_cast<u8>(cheat.arithmetic_op.Value())];
+    auto* overflow_function =
+        arithmetic_overflow_checks[static_cast<u8>(cheat.arithmetic_op.Value())];
+    LOG_DEBUG(Common_Filesystem, "performing arithmetic with register={:01X}, value={:016X}",
+              cheat.register_3, cheat.ValueWidth(4));
+
+    if (overflow_function(register_3, cheat.ValueWidth(4))) {
+        LOG_WARNING(Common_Filesystem,
+                    "overflow will occur when performing arithmetic operation={:02X} with operands "
+                    "a={:016X}, b={:016X}!",
+                    static_cast<u8>(cheat.arithmetic_op.Value()), register_3, cheat.ValueWidth(4));
+    }
+
+    register_3 = function(register_3, cheat.ValueWidth(4));
+}
+
+void CheatList::BeginConditionalInput(const Cheat& cheat) {
+    if (EvaluateConditional(cheat))
+        return;
+
+    const auto iter = block_pairs.find(current_index);
+    ASSERT(iter != block_pairs.end());
+    current_index = iter->second - 1;
+}
+
+VAddr CheatList::SanitizeAddress(VAddr in) const {
+    if ((in < main_region_begin || in >= main_region_end) &&
+        (in < heap_region_begin || in >= heap_region_end)) {
+        LOG_ERROR(Common_Filesystem,
+                  "Cheat attempting to access memory at invalid address={:016X}, if this persists, "
+                  "the cheat may be incorrect. However, this may be normal early in execution if "
+                  "the game has not properly set up yet.",
+                  in);
+        return 0; ///< Invalid addresses will hard crash
+    }
+
+    return in;
+}
+
+void CheatList::ExecuteSingleCheat(const Cheat& cheat) {
+    using CheatOperationFunction = void (CheatList::*)(const Cheat&);
+    constexpr std::array<CheatOperationFunction, 9> cheat_operation_functions{
+        &CheatList::WriteImmediate,        &CheatList::BeginConditional,
+        &CheatList::EndConditional,        &CheatList::Loop,
+        &CheatList::LoadImmediate,         &CheatList::LoadIndexed,
+        &CheatList::StoreIndexed,          &CheatList::RegisterArithmetic,
+        &CheatList::BeginConditionalInput,
+    };
+
+    const auto index = static_cast<u8>(cheat.type.Value());
+    ASSERT(index < sizeof(cheat_operation_functions));
+    const auto op = cheat_operation_functions[index];
+    (this->*op)(cheat);
+}
+
+void CheatList::ExecuteBlock(const Block& block) {
+    encountered_loops.clear();
+
+    ProcessBlockPairs(block);
+    for (std::size_t i = 0; i < block.size(); ++i) {
+        current_index = i;
+        ExecuteSingleCheat(block[i]);
+        i = current_index;
+    }
+}
+
+CheatParser::~CheatParser() = default;
+
+CheatList CheatParser::MakeCheatList(const Core::System& system, CheatList::ProgramSegment master,
+                                     CheatList::ProgramSegment standard) const {
+    return {system, std::move(master), std::move(standard)};
+}
+
+TextCheatParser::~TextCheatParser() = default;
+
+CheatList TextCheatParser::Parse(const Core::System& system, const std::vector<u8>& data) const {
+    std::stringstream ss;
+    ss.write(reinterpret_cast<const char*>(data.data()), data.size());
+
+    std::vector<std::string> lines;
+    std::string stream_line;
+    while (std::getline(ss, stream_line)) {
+        // Remove a trailing \r
+        if (!stream_line.empty() && stream_line.back() == '\r')
+            stream_line.pop_back();
+        lines.push_back(std::move(stream_line));
+    }
+
+    CheatList::ProgramSegment master_list;
+    CheatList::ProgramSegment standard_list;
+
+    for (std::size_t i = 0; i < lines.size(); ++i) {
+        auto line = lines[i];
+
+        if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
+            const auto master = line[0] == '{';
+            const auto begin = master ? line.find('{') : line.find('[');
+            const auto end = master ? line.rfind('}') : line.rfind(']');
+
+            ASSERT(begin != std::string::npos && end != std::string::npos);
+
+            const std::string patch_name{line.begin() + begin + 1, line.begin() + end};
+            CheatList::Block block{};
+
+            while (i < lines.size() - 1) {
+                line = lines[++i];
+                if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
+                    --i;
+                    break;
+                }
+
+                if (line.size() < 8)
+                    continue;
+
+                Cheat out{};
+                out.raw = ParseSingleLineCheat(line);
+                block.push_back(out);
+            }
+
+            (master ? master_list : standard_list).emplace_back(patch_name, block);
+        }
+    }
+
+    return MakeCheatList(system, master_list, standard_list);
+}
+
+std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line) const {
+    std::array<u8, 16> out{};
+
+    if (line.size() < 8)
+        return out;
+
+    const auto word1 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data(), 8});
+    std::memcpy(out.data(), word1.data(), sizeof(u32));
+
+    if (line.size() < 17 || line[8] != ' ')
+        return out;
+
+    const auto word2 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 9, 8});
+    std::memcpy(out.data() + sizeof(u32), word2.data(), sizeof(u32));
+
+    if (line.size() < 26 || line[17] != ' ') {
+        // Perform shifting in case value is truncated early.
+        const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
+        if (type == CodeType::Loop || type == CodeType::LoadImmediate ||
+            type == CodeType::StoreIndexed || type == CodeType::RegisterArithmetic) {
+            std::memcpy(out.data() + 8, out.data() + 4, sizeof(u32));
+            std::memset(out.data() + 4, 0, sizeof(u32));
+        }
+
+        return out;
+    }
+
+    const auto word3 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 18, 8});
+    std::memcpy(out.data() + 2 * sizeof(u32), word3.data(), sizeof(u32));
+
+    if (line.size() < 35 || line[26] != ' ') {
+        // Perform shifting in case value is truncated early.
+        const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
+        if (type == CodeType::WriteImmediate || type == CodeType::Conditional) {
+            std::memcpy(out.data() + 12, out.data() + 8, sizeof(u32));
+            std::memset(out.data() + 8, 0, sizeof(u32));
+        }
+
+        return out;
+    }
+
+    const auto word4 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 27, 8});
+    std::memcpy(out.data() + 3 * sizeof(u32), word4.data(), sizeof(u32));
+
+    return out;
+}
+
+namespace {
+u64 MemoryReadImpl(u32 width, VAddr addr) {
+    switch (width) {
+    case 1:
+        return Memory::Read8(addr);
+    case 2:
+        return Memory::Read16(addr);
+    case 4:
+        return Memory::Read32(addr);
+    case 8:
+        return Memory::Read64(addr);
+    default:
+        UNREACHABLE();
+        return 0;
+    }
+}
+
+void MemoryWriteImpl(u32 width, VAddr addr, u64 value) {
+    switch (width) {
+    case 1:
+        Memory::Write8(addr, static_cast<u8>(value));
+        break;
+    case 2:
+        Memory::Write16(addr, static_cast<u16>(value));
+        break;
+    case 4:
+        Memory::Write32(addr, static_cast<u32>(value));
+        break;
+    case 8:
+        Memory::Write64(addr, value);
+        break;
+    default:
+        UNREACHABLE();
+    }
+}
+} // Anonymous namespace
+
+CheatEngine::CheatEngine(Core::System& system, std::vector<CheatList> cheats_,
+                         const std::string& build_id, VAddr code_region_start,
+                         VAddr code_region_end)
+    : cheats{std::move(cheats_)}, core_timing{system.CoreTiming()} {
+    event = core_timing.RegisterEvent(
+        "CheatEngine::FrameCallback::" + build_id,
+        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
+    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
+
+    const auto& vm_manager = system.CurrentProcess()->VMManager();
+    for (auto& list : this->cheats) {
+        list.SetMemoryParameters(code_region_start, vm_manager.GetHeapRegionBaseAddress(),
+                                 code_region_end, vm_manager.GetHeapRegionEndAddress(),
+                                 &MemoryWriteImpl, &MemoryReadImpl);
+    }
+}
+
+CheatEngine::~CheatEngine() {
+    core_timing.UnscheduleEvent(event, 0);
+}
+
+void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
+    for (auto& list : cheats) {
+        list.Execute();
+    }
+
+    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
+}
+
+} // namespace FileSys
diff --git a/src/core/file_sys/cheat_engine.h b/src/core/file_sys/cheat_engine.h
new file mode 100644
index 000000000..ac22a82cb
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.h
@@ -0,0 +1,234 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace Core::Timing {
+class CoreTiming;
+struct EventType;
+} // namespace Core::Timing
+
+namespace FileSys {
+
+enum class CodeType : u32 {
+    // 0TMR00AA AAAAAAAA YYYYYYYY YYYYYYYY
+    // Writes a T sized value Y to the address A added to the value of register R in memory domain M
+    WriteImmediate = 0,
+
+    // 1TMC00AA AAAAAAAA YYYYYYYY YYYYYYYY
+    // Compares the T sized value Y to the value at address A in memory domain M using the
+    // conditional function C. If success, continues execution. If failure, jumps to the matching
+    // EndConditional statement.
+    Conditional = 1,
+
+    // 20000000
+    // Terminates a Conditional or ConditionalInput block.
+    EndConditional = 2,
+
+    // 300R0000 VVVVVVVV
+    // Starts looping V times, storing the current count in register R.
+    // Loop block is terminated with a matching 310R0000.
+    Loop = 3,
+
+    // 400R0000 VVVVVVVV VVVVVVVV
+    // Sets the value of register R to the value V.
+    LoadImmediate = 4,
+
+    // 5TMRI0AA AAAAAAAA
+    // Sets the value of register R to the value of width T at address A in memory domain M, with
+    // the current value of R added to the address if I == 1.
+    LoadIndexed = 5,
+
+    // 6T0RIFG0 VVVVVVVV VVVVVVVV
+    // Writes the value V of width T to the memory address stored in register R. Adds the value of
+    // register G to the final calculation if F is nonzero. Increments the value of register R by T
+    // after operation if I is nonzero.
+    StoreIndexed = 6,
+
+    // 7T0RA000 VVVVVVVV
+    // Performs the arithmetic operation A on the value in register R and the value V of width T,
+    // storing the result in register R.
+    RegisterArithmetic = 7,
+
+    // 8KKKKKKK
+    // Checks to see if any of the buttons defined by the bitmask K are pressed. If any are,
+    // execution continues. If none are, execution skips to the next EndConditional command.
+    ConditionalInput = 8,
+};
+
+enum class MemoryType : u32 {
+    // Addressed relative to start of main NSO
+    MainNSO = 0,
+
+    // Addressed relative to start of heap
+    Heap = 1,
+};
+
+enum class ArithmeticOp : u32 {
+    Add = 0,
+    Sub = 1,
+    Mult = 2,
+    LShift = 3,
+    RShift = 4,
+};
+
+enum class ComparisonOp : u32 {
+    GreaterThan = 1,
+    GreaterThanEqual = 2,
+    LessThan = 3,
+    LessThanEqual = 4,
+    Equal = 5,
+    Inequal = 6,
+};
+
+union Cheat {
+    std::array<u8, 16> raw;
+
+    BitField<4, 4, CodeType> type;
+    BitField<0, 4, u32> width; // Can be 1, 2, 4, or 8. Measured in bytes.
+    BitField<0, 4, u32> end_of_loop;
+    BitField<12, 4, MemoryType> memory_type;
+    BitField<8, 4, u32> register_3;
+    BitField<8, 4, ComparisonOp> comparison_op;
+    BitField<20, 4, u32> load_from_register;
+    BitField<20, 4, u32> increment_register;
+    BitField<20, 4, ArithmeticOp> arithmetic_op;
+    BitField<16, 4, u32> add_additional_register;
+    BitField<28, 4, u32> register_6;
+
+    u64 Address() const;
+    u64 ValueWidth(u64 offset) const;
+    u64 Value(u64 offset, u64 width) const;
+    u32 KeypadValue() const;
+};
+
+class CheatParser;
+
+// Represents a full collection of cheats for a game. The Execute function should be called every
+// interval that all cheats should be executed. Clients should not directly instantiate this class
+// (hence private constructor), they should instead receive an instance from CheatParser, which
+// guarantees the list is always in an acceptable state.
+class CheatList {
+public:
+    friend class CheatParser;
+
+    using Block = std::vector<Cheat>;
+    using ProgramSegment = std::vector<std::pair<std::string, Block>>;
+
+    // (width in bytes, address, value)
+    using MemoryWriter = void (*)(u32, VAddr, u64);
+    // (width in bytes, address) -> value
+    using MemoryReader = u64 (*)(u32, VAddr);
+
+    void SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end, VAddr heap_end,
+                             MemoryWriter writer, MemoryReader reader);
+
+    void Execute();
+
+private:
+    CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard);
+
+    void ProcessBlockPairs(const Block& block);
+    void ExecuteSingleCheat(const Cheat& cheat);
+
+    void ExecuteBlock(const Block& block);
+
+    bool EvaluateConditional(const Cheat& cheat) const;
+
+    // Individual cheat operations
+    void WriteImmediate(const Cheat& cheat);
+    void BeginConditional(const Cheat& cheat);
+    void EndConditional(const Cheat& cheat);
+    void Loop(const Cheat& cheat);
+    void LoadImmediate(const Cheat& cheat);
+    void LoadIndexed(const Cheat& cheat);
+    void StoreIndexed(const Cheat& cheat);
+    void RegisterArithmetic(const Cheat& cheat);
+    void BeginConditionalInput(const Cheat& cheat);
+
+    VAddr SanitizeAddress(VAddr in) const;
+
+    // Master Codes are defined as codes that cannot be disabled and are run prior to all
+    // others.
+    ProgramSegment master_list;
+    // All other codes
+    ProgramSegment standard_list;
+
+    bool in_standard = false;
+
+    // 16 (0x0-0xF) scratch registers that can be used by cheats
+    std::array<u64, 16> scratch{};
+
+    MemoryWriter writer = nullptr;
+    MemoryReader reader = nullptr;
+
+    u64 main_region_begin{};
+    u64 heap_region_begin{};
+    u64 main_region_end{};
+    u64 heap_region_end{};
+
+    u64 current_block{};
+    // The current index of the cheat within the current Block
+    u64 current_index{};
+
+    // The 'stack' of the program. When a conditional or loop statement is encountered, its index is
+    // pushed onto this queue. When a end block is encountered, the condition is checked.
+    std::map<u64, u64> block_pairs;
+
+    std::set<u64> encountered_loops;
+
+    const Core::System* system;
+};
+
+// Intermediary class that parses a text file or other disk format for storing cheats into a
+// CheatList object, that can be used for execution.
+class CheatParser {
+public:
+    virtual ~CheatParser();
+
+    virtual CheatList Parse(const Core::System& system, const std::vector<u8>& data) const = 0;
+
+protected:
+    CheatList MakeCheatList(const Core::System& system_, CheatList::ProgramSegment master,
+                            CheatList::ProgramSegment standard) const;
+};
+
+// CheatParser implementation that parses text files
+class TextCheatParser final : public CheatParser {
+public:
+    ~TextCheatParser() override;
+
+    CheatList Parse(const Core::System& system, const std::vector<u8>& data) const override;
+
+private:
+    std::array<u8, 16> ParseSingleLineCheat(const std::string& line) const;
+};
+
+// Class that encapsulates a CheatList and manages its interaction with memory and CoreTiming
+class CheatEngine final {
+public:
+    CheatEngine(Core::System& system_, std::vector<CheatList> cheats_, const std::string& build_id,
+                VAddr code_region_start, VAddr code_region_end);
+    ~CheatEngine();
+
+private:
+    void FrameCallback(u64 userdata, s64 cycles_late);
+
+    std::vector<CheatList> cheats;
+
+    Core::Timing::EventType* event;
+    Core::Timing::CoreTiming& core_timing;
+};
+
+} // namespace FileSys
diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h
index 5d4d05c82..15b9e6624 100644
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -24,13 +24,26 @@ namespace FileSys {
 
 union NCASectionHeader;
 
+/// Describes the type of content within an NCA archive.
 enum class NCAContentType : u8 {
+    /// Executable-related data
     Program = 0,
+
+    /// Metadata.
     Meta = 1,
+
+    /// Access control data.
     Control = 2,
+
+    /// Information related to the game manual
+    /// e.g. Legal information, etc.
     Manual = 3,
+
+    /// System data.
     Data = 4,
-    Data_Unknown5 = 5, ///< Seems to be used on some system archives
+
+    /// Data that can be accessed by applications.
+    PublicData = 5,
 };
 
 enum class NCASectionCryptoType : u8 {
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp
index 83c184750..60ea9ad12 100644
--- a/src/core/file_sys/control_metadata.cpp
+++ b/src/core/file_sys/control_metadata.cpp
@@ -67,7 +67,7 @@ std::string NACP::GetDeveloperName(Language language) const {
 }
 
 u64 NACP::GetTitleId() const {
-    return raw.title_id;
+    return raw.save_data_owner_id;
 }
 
 u64 NACP::GetDLCBaseTitleId() const {
@@ -80,11 +80,11 @@ std::string NACP::GetVersionString() const {
 }
 
 u64 NACP::GetDefaultNormalSaveSize() const {
-    return raw.normal_save_data_size;
+    return raw.user_account_save_data_size;
 }
 
 u64 NACP::GetDefaultJournalSaveSize() const {
-    return raw.journal_sava_data_size;
+    return raw.user_account_save_data_journal_size;
 }
 
 std::vector<u8> NACP::GetRawBytes() const {
diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h
index 7b9cdc910..280710ddf 100644
--- a/src/core/file_sys/control_metadata.h
+++ b/src/core/file_sys/control_metadata.h
@@ -38,23 +38,35 @@ struct RawNACP {
     u8 video_capture_mode;
     bool data_loss_confirmation;
     INSERT_PADDING_BYTES(1);
-    u64_le title_id;
+    u64_le presence_group_id;
     std::array<u8, 0x20> rating_age;
     std::array<char, 0x10> version_string;
     u64_le dlc_base_title_id;
-    u64_le title_id_2;
-    u64_le normal_save_data_size;
-    u64_le journal_sava_data_size;
-    INSERT_PADDING_BYTES(0x18);
-    u64_le product_code;
+    u64_le save_data_owner_id;
+    u64_le user_account_save_data_size;
+    u64_le user_account_save_data_journal_size;
+    u64_le device_save_data_size;
+    u64_le device_save_data_journal_size;
+    u64_le bcat_delivery_cache_storage_size;
+    char application_error_code_category[8];
     std::array<u64_le, 0x8> local_communication;
     u8 logo_type;
     u8 logo_handling;
     bool runtime_add_on_content_install;
     INSERT_PADDING_BYTES(5);
-    u64_le title_id_update;
-    std::array<u8, 0x40> bcat_passphrase;
-    INSERT_PADDING_BYTES(0xEC0);
+    u64_le seed_for_pseudo_device_id;
+    std::array<u8, 0x41> bcat_passphrase;
+    INSERT_PADDING_BYTES(7);
+    u64_le user_account_save_data_max_size;
+    u64_le user_account_save_data_max_journal_size;
+    u64_le device_save_data_max_size;
+    u64_le device_save_data_max_journal_size;
+    u64_le temporary_storage_size;
+    u64_le cache_storage_size;
+    u64_le cache_storage_journal_size;
+    u64_le cache_storage_data_and_journal_max_size;
+    u64_le cache_storage_max_index;
+    INSERT_PADDING_BYTES(0xE70);
 };
 static_assert(sizeof(RawNACP) == 0x4000, "RawNACP has incorrect size.");
 
diff --git a/src/core/file_sys/errors.h b/src/core/file_sys/errors.h
index e4a4ee4ab..bb4654366 100644
--- a/src/core/file_sys/errors.h
+++ b/src/core/file_sys/errors.h
@@ -11,6 +11,9 @@ namespace FileSys {
 constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1};
 constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002};
 constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001};
+constexpr ResultCode ERROR_OUT_OF_BOUNDS{ErrorModule::FS, 3005};
+constexpr ResultCode ERROR_FAILED_MOUNT_ARCHIVE{ErrorModule::FS, 3223};
+constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::FS, 6001};
 constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061};
 constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062};
 
diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp
index 47b7526c7..d126ae8dd 100644
--- a/src/core/file_sys/fsmitm_romfsbuild.cpp
+++ b/src/core/file_sys/fsmitm_romfsbuild.cpp
@@ -23,6 +23,7 @@
  */
 
 #include <cstring>
+#include <string_view>
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "core/file_sys/fsmitm_romfsbuild.h"
@@ -97,7 +98,8 @@ struct RomFSBuildFileContext {
     VirtualFile source;
 };
 
-static u32 romfs_calc_path_hash(u32 parent, std::string path, u32 start, std::size_t path_len) {
+static u32 romfs_calc_path_hash(u32 parent, std::string_view path, u32 start,
+                                std::size_t path_len) {
     u32 hash = parent ^ 123456789;
     for (u32 i = 0; i < path_len; i++) {
         hash = (hash >> 5) | (hash << 27);
diff --git a/src/core/file_sys/nca_metadata.cpp b/src/core/file_sys/nca_metadata.cpp
index 6f34b7836..93d0df6b9 100644
--- a/src/core/file_sys/nca_metadata.cpp
+++ b/src/core/file_sys/nca_metadata.cpp
@@ -10,14 +10,6 @@
 
 namespace FileSys {
 
-bool operator>=(TitleType lhs, TitleType rhs) {
-    return static_cast<std::size_t>(lhs) >= static_cast<std::size_t>(rhs);
-}
-
-bool operator<=(TitleType lhs, TitleType rhs) {
-    return static_cast<std::size_t>(lhs) <= static_cast<std::size_t>(rhs);
-}
-
 CNMT::CNMT(VirtualFile file) {
     if (file->ReadObject(&header) != sizeof(CNMTHeader))
         return;
diff --git a/src/core/file_sys/nca_metadata.h b/src/core/file_sys/nca_metadata.h
index a05d155f4..50bf38471 100644
--- a/src/core/file_sys/nca_metadata.h
+++ b/src/core/file_sys/nca_metadata.h
@@ -29,9 +29,6 @@ enum class TitleType : u8 {
     DeltaTitle = 0x83,
 };
 
-bool operator>=(TitleType lhs, TitleType rhs);
-bool operator<=(TitleType lhs, TitleType rhs);
-
 enum class ContentRecordType : u8 {
     Meta = 0,
     Program = 1,
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 61706966e..78dbadee3 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -7,8 +7,10 @@
 #include <cstddef>
 #include <cstring>
 
+#include "common/file_util.h"
 #include "common/hex_util.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/ips_layer.h"
@@ -19,6 +21,7 @@
 #include "core/file_sys/vfs_vector.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/loader.h"
+#include "core/loader/nso.h"
 #include "core/settings.h"
 
 namespace FileSys {
@@ -31,14 +34,6 @@ constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
     "subsdk3", "subsdk4",   "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9",
 };
 
-struct NSOBuildHeader {
-    u32_le magic;
-    INSERT_PADDING_BYTES(0x3C);
-    std::array<u8, 0x20> build_id;
-    INSERT_PADDING_BYTES(0xA0);
-};
-static_assert(sizeof(NSOBuildHeader) == 0x100, "NSOBuildHeader has incorrect size.");
-
 std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
     std::array<u8, sizeof(u32)> bytes{};
     bytes[0] = version % SINGLE_BYTE_MODULUS;
@@ -75,7 +70,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
         }
     }
 
-    const auto installed = Service::FileSystem::GetUnionContents();
+    const auto& installed = Core::System::GetInstance().GetContentProvider();
 
     const auto& disabled = Settings::values.disabled_addons[title_id];
     const auto update_disabled =
@@ -161,32 +156,35 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
     return out;
 }
 
-std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
-    if (nso.size() < 0x100)
+std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso, const std::string& name) const {
+    if (nso.size() < sizeof(Loader::NSOHeader)) {
         return nso;
+    }
 
-    NSOBuildHeader header;
-    std::memcpy(&header, nso.data(), sizeof(NSOBuildHeader));
+    Loader::NSOHeader header;
+    std::memcpy(&header, nso.data(), sizeof(header));
 
-    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
         return nso;
+    }
 
     const auto build_id_raw = Common::HexArrayToString(header.build_id);
     const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1);
 
     if (Settings::values.dump_nso) {
-        LOG_INFO(Loader, "Dumping NSO for build_id={}, title_id={:016X}", build_id, title_id);
+        LOG_INFO(Loader, "Dumping NSO for name={}, build_id={}, title_id={:016X}", name, build_id,
+                 title_id);
         const auto dump_dir = Service::FileSystem::GetModificationDumpRoot(title_id);
         if (dump_dir != nullptr) {
             const auto nso_dir = GetOrCreateDirectoryRelative(dump_dir, "/nso");
-            const auto file = nso_dir->CreateFile(fmt::format("{}.nso", build_id));
+            const auto file = nso_dir->CreateFile(fmt::format("{}-{}.nso", name, build_id));
 
             file->Resize(nso.size());
             file->WriteBytes(nso);
         }
     }
 
-    LOG_INFO(Loader, "Patching NSO for build_id={}", build_id);
+    LOG_INFO(Loader, "Patching NSO for name={}, build_id={}", name, build_id);
 
     const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
     auto patch_dirs = load_dir->GetSubdirectories();
@@ -212,9 +210,11 @@ std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
         }
     }
 
-    if (out.size() < 0x100)
+    if (out.size() < sizeof(Loader::NSOHeader)) {
         return nso;
-    std::memcpy(out.data(), &header, sizeof(NSOBuildHeader));
+    }
+
+    std::memcpy(out.data(), &header, sizeof(header));
     return out;
 }
 
@@ -232,6 +232,57 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const {
     return !CollectPatches(patch_dirs, build_id).empty();
 }
 
+static std::optional<CheatList> ReadCheatFileFromFolder(const Core::System& system, u64 title_id,
+                                                        const std::array<u8, 0x20>& build_id_,
+                                                        const VirtualDir& base_path, bool upper) {
+    const auto build_id_raw = Common::HexArrayToString(build_id_, upper);
+    const auto build_id = build_id_raw.substr(0, sizeof(u64) * 2);
+    const auto file = base_path->GetFile(fmt::format("{}.txt", build_id));
+
+    if (file == nullptr) {
+        LOG_INFO(Common_Filesystem, "No cheats file found for title_id={:016X}, build_id={}",
+                 title_id, build_id);
+        return std::nullopt;
+    }
+
+    std::vector<u8> data(file->GetSize());
+    if (file->Read(data.data(), data.size()) != data.size()) {
+        LOG_INFO(Common_Filesystem, "Failed to read cheats file for title_id={:016X}, build_id={}",
+                 title_id, build_id);
+        return std::nullopt;
+    }
+
+    TextCheatParser parser;
+    return parser.Parse(system, data);
+}
+
+std::vector<CheatList> PatchManager::CreateCheatList(const Core::System& system,
+                                                     const std::array<u8, 32>& build_id_) const {
+    const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
+    auto patch_dirs = load_dir->GetSubdirectories();
+    std::sort(patch_dirs.begin(), patch_dirs.end(),
+              [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
+
+    std::vector<CheatList> out;
+    out.reserve(patch_dirs.size());
+    for (const auto& subdir : patch_dirs) {
+        auto cheats_dir = subdir->GetSubdirectory("cheats");
+        if (cheats_dir != nullptr) {
+            auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
+            if (res.has_value()) {
+                out.push_back(std::move(*res));
+                continue;
+            }
+
+            res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, false);
+            if (res.has_value())
+                out.push_back(std::move(*res));
+        }
+    }
+
+    return out;
+}
+
 static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) {
     const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
     if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
@@ -296,7 +347,7 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
     if (romfs == nullptr)
         return romfs;
 
-    const auto installed = Service::FileSystem::GetUnionContents();
+    const auto& installed = Core::System::GetInstance().GetContentProvider();
 
     // Game Updates
     const auto update_tid = GetUpdateTitleID(title_id);
@@ -343,7 +394,7 @@ static bool IsDirValidAndNonEmpty(const VirtualDir& dir) {
 std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNames(
     VirtualFile update_raw) const {
     std::map<std::string, std::string, std::less<>> out;
-    const auto installed = Service::FileSystem::GetUnionContents();
+    const auto& installed = Core::System::GetInstance().GetContentProvider();
     const auto& disabled = Settings::values.disabled_addons[title_id];
 
     // Game Updates
@@ -403,6 +454,8 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
             }
             if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
                 AppendCommaIfNotEmpty(types, "LayeredFS");
+            if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
+                AppendCommaIfNotEmpty(types, "Cheats");
 
             if (types.empty())
                 continue;
@@ -415,10 +468,10 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
 
     // DLC
     const auto dlc_entries = installed.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data);
-    std::vector<RegisteredCacheEntry> dlc_match;
+    std::vector<ContentProviderEntry> dlc_match;
     dlc_match.reserve(dlc_entries.size());
     std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match),
-                 [this, &installed](const RegisteredCacheEntry& entry) {
+                 [this, &installed](const ContentProviderEntry& entry) {
                      return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == title_id &&
                             installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success;
                  });
@@ -441,7 +494,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
 }
 
 std::pair<std::unique_ptr<NACP>, VirtualFile> PatchManager::GetControlMetadata() const {
-    const auto installed{Service::FileSystem::GetUnionContents()};
+    const auto& installed = Core::System::GetInstance().GetContentProvider();
 
     const auto base_control_nca = installed.GetEntry(title_id, ContentRecordType::Control);
     if (base_control_nca == nullptr)
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index b8a1652fd..769f8c6f0 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -8,9 +8,14 @@
 #include <memory>
 #include <string>
 #include "common/common_types.h"
+#include "core/file_sys/cheat_engine.h"
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/vfs.h"
 
+namespace Core {
+class System;
+}
+
 namespace FileSys {
 
 class NCA;
@@ -39,12 +44,16 @@ public:
     // Currently tracked NSO patches:
     // - IPS
     // - IPSwitch
-    std::vector<u8> PatchNSO(const std::vector<u8>& nso) const;
+    std::vector<u8> PatchNSO(const std::vector<u8>& nso, const std::string& name) const;
 
     // Checks to see if PatchNSO() will have any effect given the NSO's build ID.
     // Used to prevent expensive copies in NSO loader.
     bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const;
 
+    // Creates a CheatList object with all
+    std::vector<CheatList> CreateCheatList(const Core::System& system,
+                                           const std::array<u8, 0x20>& build_id) const;
+
     // Currently tracked RomFS patches:
     // - Game Updates
     // - LayeredFS
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index d3e00437f..d863253f8 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -3,7 +3,6 @@
 // Refer to the license.txt file included.
 
 #include <cstddef>
-#include <cstring>
 #include <vector>
 
 #include "common/logging/log.h"
@@ -17,28 +16,30 @@ ProgramMetadata::ProgramMetadata() = default;
 ProgramMetadata::~ProgramMetadata() = default;
 
 Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
-    std::size_t total_size = static_cast<std::size_t>(file->GetSize());
-    if (total_size < sizeof(Header))
+    const std::size_t total_size = file->GetSize();
+    if (total_size < sizeof(Header)) {
         return Loader::ResultStatus::ErrorBadNPDMHeader;
+    }
 
-    // TODO(DarkLordZach): Use ReadObject when Header/AcidHeader becomes trivially copyable.
-    std::vector<u8> npdm_header_data = file->ReadBytes(sizeof(Header));
-    if (sizeof(Header) != npdm_header_data.size())
+    if (sizeof(Header) != file->ReadObject(&npdm_header)) {
         return Loader::ResultStatus::ErrorBadNPDMHeader;
-    std::memcpy(&npdm_header, npdm_header_data.data(), sizeof(Header));
+    }
 
-    std::vector<u8> acid_header_data = file->ReadBytes(sizeof(AcidHeader), npdm_header.acid_offset);
-    if (sizeof(AcidHeader) != acid_header_data.size())
+    if (sizeof(AcidHeader) != file->ReadObject(&acid_header, npdm_header.acid_offset)) {
         return Loader::ResultStatus::ErrorBadACIDHeader;
-    std::memcpy(&acid_header, acid_header_data.data(), sizeof(AcidHeader));
+    }
 
-    if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset))
+    if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset)) {
         return Loader::ResultStatus::ErrorBadACIHeader;
+    }
 
-    if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset))
+    if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset)) {
         return Loader::ResultStatus::ErrorBadFileAccessControl;
-    if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset))
+    }
+
+    if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset)) {
         return Loader::ResultStatus::ErrorBadFileAccessHeader;
+    }
 
     aci_kernel_capabilities.resize(aci_header.kac_size / sizeof(u32));
     const u64 read_size = aci_header.kac_size;
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 0033ba347..7de5b9cf9 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -58,7 +58,6 @@ public:
     void Print() const;
 
 private:
-    // TODO(DarkLordZach): BitField is not trivially copyable.
     struct Header {
         std::array<char, 4> magic;
         std::array<u8, 8> reserved;
@@ -85,7 +84,6 @@ private:
 
     static_assert(sizeof(Header) == 0x80, "NPDM header structure size is wrong");
 
-    // TODO(DarkLordZach): BitField is not trivially copyable.
     struct AcidHeader {
         std::array<u8, 0x100> signature;
         std::array<u8, 0x100> nca_modulus;
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 128199063..3946ff871 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -23,19 +23,19 @@ namespace FileSys {
 // The size of blocks to use when vfs raw copying into nand.
 constexpr size_t VFS_RC_LARGE_COPY_BLOCK = 0x400000;
 
-std::string RegisteredCacheEntry::DebugInfo() const {
+std::string ContentProviderEntry::DebugInfo() const {
     return fmt::format("title_id={:016X}, content_type={:02X}", title_id, static_cast<u8>(type));
 }
 
-bool operator<(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) {
+bool operator<(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
     return (lhs.title_id < rhs.title_id) || (lhs.title_id == rhs.title_id && lhs.type < rhs.type);
 }
 
-bool operator==(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) {
+bool operator==(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
     return std::tie(lhs.title_id, lhs.type) == std::tie(rhs.title_id, rhs.type);
 }
 
-bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) {
+bool operator!=(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
     return !operator==(lhs, rhs);
 }
 
@@ -84,7 +84,7 @@ static std::string GetCNMTName(TitleType type, u64 title_id) {
     return fmt::format("{}_{:016x}.cnmt", TITLE_TYPE_NAMES[index], title_id);
 }
 
-static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
+ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
     switch (type) {
     case NCAContentType::Program:
         // TODO(DarkLordZach): Differentiate between Program and Patch
@@ -94,7 +94,7 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
     case NCAContentType::Control:
         return ContentRecordType::Control;
     case NCAContentType::Data:
-    case NCAContentType::Data_Unknown5:
+    case NCAContentType::PublicData:
         return ContentRecordType::Data;
     case NCAContentType::Manual:
         // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.
@@ -104,6 +104,28 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
     }
 }
 
+ContentProvider::~ContentProvider() = default;
+
+bool ContentProvider::HasEntry(ContentProviderEntry entry) const {
+    return HasEntry(entry.title_id, entry.type);
+}
+
+VirtualFile ContentProvider::GetEntryUnparsed(ContentProviderEntry entry) const {
+    return GetEntryUnparsed(entry.title_id, entry.type);
+}
+
+VirtualFile ContentProvider::GetEntryRaw(ContentProviderEntry entry) const {
+    return GetEntryRaw(entry.title_id, entry.type);
+}
+
+std::unique_ptr<NCA> ContentProvider::GetEntry(ContentProviderEntry entry) const {
+    return GetEntry(entry.title_id, entry.type);
+}
+
+std::vector<ContentProviderEntry> ContentProvider::ListEntries() const {
+    return ListEntriesFilter(std::nullopt, std::nullopt, std::nullopt);
+}
+
 VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& dir,
                                                        std::string_view path) const {
     const auto file = dir->GetFileRelative(path);
@@ -161,8 +183,8 @@ VirtualFile RegisteredCache::GetFileAtID(NcaID id) const {
     return file;
 }
 
-static std::optional<NcaID> CheckMapForContentRecord(
-    const boost::container::flat_map<u64, CNMT>& map, u64 title_id, ContentRecordType type) {
+static std::optional<NcaID> CheckMapForContentRecord(const std::map<u64, CNMT>& map, u64 title_id,
+                                                     ContentRecordType type) {
     if (map.find(title_id) == map.end())
         return {};
 
@@ -268,7 +290,7 @@ void RegisteredCache::Refresh() {
     AccumulateYuzuMeta();
 }
 
-RegisteredCache::RegisteredCache(VirtualDir dir_, RegisteredCacheParsingFunction parsing_function)
+RegisteredCache::RegisteredCache(VirtualDir dir_, ContentProviderParsingFunction parsing_function)
     : dir(std::move(dir_)), parser(std::move(parsing_function)) {
     Refresh();
 }
@@ -279,19 +301,11 @@ bool RegisteredCache::HasEntry(u64 title_id, ContentRecordType type) const {
     return GetEntryRaw(title_id, type) != nullptr;
 }
 
-bool RegisteredCache::HasEntry(RegisteredCacheEntry entry) const {
-    return GetEntryRaw(entry) != nullptr;
-}
-
 VirtualFile RegisteredCache::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
     const auto id = GetNcaIDFromMetadata(title_id, type);
     return id ? GetFileAtID(*id) : nullptr;
 }
 
-VirtualFile RegisteredCache::GetEntryUnparsed(RegisteredCacheEntry entry) const {
-    return GetEntryUnparsed(entry.title_id, entry.type);
-}
-
 std::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
     const auto meta_iter = meta.find(title_id);
     if (meta_iter != meta.end())
@@ -309,10 +323,6 @@ VirtualFile RegisteredCache::GetEntryRaw(u64 title_id, ContentRecordType type) c
     return id ? parser(GetFileAtID(*id), *id) : nullptr;
 }
 
-VirtualFile RegisteredCache::GetEntryRaw(RegisteredCacheEntry entry) const {
-    return GetEntryRaw(entry.title_id, entry.type);
-}
-
 std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType type) const {
     const auto raw = GetEntryRaw(title_id, type);
     if (raw == nullptr)
@@ -320,10 +330,6 @@ std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType t
     return std::make_unique<NCA>(raw, nullptr, 0, keys);
 }
 
-std::unique_ptr<NCA> RegisteredCache::GetEntry(RegisteredCacheEntry entry) const {
-    return GetEntry(entry.title_id, entry.type);
-}
-
 template <typename T>
 void RegisteredCache::IterateAllMetadata(
     std::vector<T>& out, std::function<T(const CNMT&, const ContentRecord&)> proc,
@@ -348,25 +354,14 @@ void RegisteredCache::IterateAllMetadata(
     }
 }
 
-std::vector<RegisteredCacheEntry> RegisteredCache::ListEntries() const {
-    std::vector<RegisteredCacheEntry> out;
-    IterateAllMetadata<RegisteredCacheEntry>(
-        out,
-        [](const CNMT& c, const ContentRecord& r) {
-            return RegisteredCacheEntry{c.GetTitleID(), r.type};
-        },
-        [](const CNMT& c, const ContentRecord& r) { return true; });
-    return out;
-}
-
-std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter(
+std::vector<ContentProviderEntry> RegisteredCache::ListEntriesFilter(
     std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
     std::optional<u64> title_id) const {
-    std::vector<RegisteredCacheEntry> out;
-    IterateAllMetadata<RegisteredCacheEntry>(
+    std::vector<ContentProviderEntry> out;
+    IterateAllMetadata<ContentProviderEntry>(
         out,
         [](const CNMT& c, const ContentRecord& r) {
-            return RegisteredCacheEntry{c.GetTitleID(), r.type};
+            return ContentProviderEntry{c.GetTitleID(), r.type};
         },
         [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
             if (title_type && *title_type != c.GetType())
@@ -521,37 +516,56 @@ bool RegisteredCache::RawInstallYuzuMeta(const CNMT& cnmt) {
                         }) != yuzu_meta.end();
 }
 
-RegisteredCacheUnion::RegisteredCacheUnion(std::vector<RegisteredCache*> caches)
-    : caches(std::move(caches)) {}
+ContentProviderUnion::~ContentProviderUnion() = default;
 
-void RegisteredCacheUnion::Refresh() {
-    for (const auto& c : caches)
-        c->Refresh();
+void ContentProviderUnion::SetSlot(ContentProviderUnionSlot slot, ContentProvider* provider) {
+    providers[slot] = provider;
 }
 
-bool RegisteredCacheUnion::HasEntry(u64 title_id, ContentRecordType type) const {
-    return std::any_of(caches.begin(), caches.end(), [title_id, type](const auto& cache) {
-        return cache->HasEntry(title_id, type);
-    });
+void ContentProviderUnion::ClearSlot(ContentProviderUnionSlot slot) {
+    providers[slot] = nullptr;
 }
 
-bool RegisteredCacheUnion::HasEntry(RegisteredCacheEntry entry) const {
-    return HasEntry(entry.title_id, entry.type);
+void ContentProviderUnion::Refresh() {
+    for (auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        provider.second->Refresh();
+    }
 }
 
-std::optional<u32> RegisteredCacheUnion::GetEntryVersion(u64 title_id) const {
-    for (const auto& c : caches) {
-        const auto res = c->GetEntryVersion(title_id);
-        if (res)
+bool ContentProviderUnion::HasEntry(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        if (provider.second->HasEntry(title_id, type))
+            return true;
+    }
+
+    return false;
+}
+
+std::optional<u32> ContentProviderUnion::GetEntryVersion(u64 title_id) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        const auto res = provider.second->GetEntryVersion(title_id);
+        if (res != std::nullopt)
             return res;
     }
 
-    return {};
+    return std::nullopt;
 }
 
-VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
-    for (const auto& c : caches) {
-        const auto res = c->GetEntryUnparsed(title_id, type);
+VirtualFile ContentProviderUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        const auto res = provider.second->GetEntryUnparsed(title_id, type);
         if (res != nullptr)
             return res;
     }
@@ -559,13 +573,12 @@ VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordTy
     return nullptr;
 }
 
-VirtualFile RegisteredCacheUnion::GetEntryUnparsed(RegisteredCacheEntry entry) const {
-    return GetEntryUnparsed(entry.title_id, entry.type);
-}
+VirtualFile ContentProviderUnion::GetEntryRaw(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
 
-VirtualFile RegisteredCacheUnion::GetEntryRaw(u64 title_id, ContentRecordType type) const {
-    for (const auto& c : caches) {
-        const auto res = c->GetEntryRaw(title_id, type);
+        const auto res = provider.second->GetEntryRaw(title_id, type);
         if (res != nullptr)
             return res;
     }
@@ -573,30 +586,56 @@ VirtualFile RegisteredCacheUnion::GetEntryRaw(u64 title_id, ContentRecordType ty
     return nullptr;
 }
 
-VirtualFile RegisteredCacheUnion::GetEntryRaw(RegisteredCacheEntry entry) const {
-    return GetEntryRaw(entry.title_id, entry.type);
-}
+std::unique_ptr<NCA> ContentProviderUnion::GetEntry(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
 
-std::unique_ptr<NCA> RegisteredCacheUnion::GetEntry(u64 title_id, ContentRecordType type) const {
-    const auto raw = GetEntryRaw(title_id, type);
-    if (raw == nullptr)
-        return nullptr;
-    return std::make_unique<NCA>(raw);
+        auto res = provider.second->GetEntry(title_id, type);
+        if (res != nullptr)
+            return res;
+    }
+
+    return nullptr;
 }
 
-std::unique_ptr<NCA> RegisteredCacheUnion::GetEntry(RegisteredCacheEntry entry) const {
-    return GetEntry(entry.title_id, entry.type);
+std::vector<ContentProviderEntry> ContentProviderUnion::ListEntriesFilter(
+    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+    std::optional<u64> title_id) const {
+    std::vector<ContentProviderEntry> out;
+
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        const auto vec = provider.second->ListEntriesFilter(title_type, record_type, title_id);
+        std::copy(vec.begin(), vec.end(), std::back_inserter(out));
+    }
+
+    std::sort(out.begin(), out.end());
+    out.erase(std::unique(out.begin(), out.end()), out.end());
+    return out;
 }
 
-std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const {
-    std::vector<RegisteredCacheEntry> out;
-    for (const auto& c : caches) {
-        c->IterateAllMetadata<RegisteredCacheEntry>(
-            out,
-            [](const CNMT& c, const ContentRecord& r) {
-                return RegisteredCacheEntry{c.GetTitleID(), r.type};
-            },
-            [](const CNMT& c, const ContentRecord& r) { return true; });
+std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>>
+ContentProviderUnion::ListEntriesFilterOrigin(std::optional<ContentProviderUnionSlot> origin,
+                                              std::optional<TitleType> title_type,
+                                              std::optional<ContentRecordType> record_type,
+                                              std::optional<u64> title_id) const {
+    std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> out;
+
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        if (origin.has_value() && *origin != provider.first)
+            continue;
+
+        const auto vec = provider.second->ListEntriesFilter(title_type, record_type, title_id);
+        std::transform(vec.begin(), vec.end(), std::back_inserter(out),
+                       [&provider](const ContentProviderEntry& entry) {
+                           return std::make_pair(provider.first, entry);
+                       });
     }
 
     std::sort(out.begin(), out.end());
@@ -604,25 +643,61 @@ std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const {
     return out;
 }
 
-std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntriesFilter(
+ManualContentProvider::~ManualContentProvider() = default;
+
+void ManualContentProvider::AddEntry(TitleType title_type, ContentRecordType content_type,
+                                     u64 title_id, VirtualFile file) {
+    entries.insert_or_assign({title_type, content_type, title_id}, file);
+}
+
+void ManualContentProvider::ClearAllEntries() {
+    entries.clear();
+}
+
+void ManualContentProvider::Refresh() {}
+
+bool ManualContentProvider::HasEntry(u64 title_id, ContentRecordType type) const {
+    return GetEntryRaw(title_id, type) != nullptr;
+}
+
+std::optional<u32> ManualContentProvider::GetEntryVersion(u64 title_id) const {
+    return std::nullopt;
+}
+
+VirtualFile ManualContentProvider::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
+    return GetEntryRaw(title_id, type);
+}
+
+VirtualFile ManualContentProvider::GetEntryRaw(u64 title_id, ContentRecordType type) const {
+    const auto iter =
+        std::find_if(entries.begin(), entries.end(), [title_id, type](const auto& entry) {
+            const auto [title_type, content_type, e_title_id] = entry.first;
+            return content_type == type && e_title_id == title_id;
+        });
+    if (iter == entries.end())
+        return nullptr;
+    return iter->second;
+}
+
+std::unique_ptr<NCA> ManualContentProvider::GetEntry(u64 title_id, ContentRecordType type) const {
+    const auto res = GetEntryRaw(title_id, type);
+    if (res == nullptr)
+        return nullptr;
+    return std::make_unique<NCA>(res, nullptr, 0, keys);
+}
+
+std::vector<ContentProviderEntry> ManualContentProvider::ListEntriesFilter(
     std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
     std::optional<u64> title_id) const {
-    std::vector<RegisteredCacheEntry> out;
-    for (const auto& c : caches) {
-        c->IterateAllMetadata<RegisteredCacheEntry>(
-            out,
-            [](const CNMT& c, const ContentRecord& r) {
-                return RegisteredCacheEntry{c.GetTitleID(), r.type};
-            },
-            [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
-                if (title_type && *title_type != c.GetType())
-                    return false;
-                if (record_type && *record_type != r.type)
-                    return false;
-                if (title_id && *title_id != c.GetTitleID())
-                    return false;
-                return true;
-            });
+    std::vector<ContentProviderEntry> out;
+
+    for (const auto& entry : entries) {
+        const auto [e_title_type, e_content_type, e_title_id] = entry.first;
+        if ((title_type == std::nullopt || e_title_type == *title_type) &&
+            (record_type == std::nullopt || e_content_type == *record_type) &&
+            (title_id == std::nullopt || e_title_id == *title_id)) {
+            out.emplace_back(ContentProviderEntry{e_title_id, e_content_type});
+        }
     }
 
     std::sort(out.begin(), out.end());
diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h
index 3b77af4e0..ec9052653 100644
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -21,12 +21,13 @@ class NSP;
 class XCI;
 
 enum class ContentRecordType : u8;
+enum class NCAContentType : u8;
 enum class TitleType : u8;
 
 struct ContentRecord;
 
 using NcaID = std::array<u8, 0x10>;
-using RegisteredCacheParsingFunction = std::function<VirtualFile(const VirtualFile&, const NcaID&)>;
+using ContentProviderParsingFunction = std::function<VirtualFile(const VirtualFile&, const NcaID&)>;
 using VfsCopyFunction = std::function<bool(const VirtualFile&, const VirtualFile&, size_t)>;
 
 enum class InstallResult {
@@ -36,7 +37,7 @@ enum class InstallResult {
     ErrorMetaFailed,
 };
 
-struct RegisteredCacheEntry {
+struct ContentProviderEntry {
     u64 title_id;
     ContentRecordType type;
 
@@ -47,12 +48,46 @@ constexpr u64 GetUpdateTitleID(u64 base_title_id) {
     return base_title_id | 0x800;
 }
 
+ContentRecordType GetCRTypeFromNCAType(NCAContentType type);
+
 // boost flat_map requires operator< for O(log(n)) lookups.
-bool operator<(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs);
+bool operator<(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
 
 // std unique requires operator== to identify duplicates.
-bool operator==(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs);
-bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs);
+bool operator==(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
+bool operator!=(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
+
+class ContentProvider {
+public:
+    virtual ~ContentProvider();
+
+    virtual void Refresh() = 0;
+
+    virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0;
+    virtual bool HasEntry(ContentProviderEntry entry) const;
+
+    virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0;
+
+    virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0;
+    virtual VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const;
+
+    virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0;
+    virtual VirtualFile GetEntryRaw(ContentProviderEntry entry) const;
+
+    virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0;
+    virtual std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const;
+
+    virtual std::vector<ContentProviderEntry> ListEntries() const;
+
+    // If a parameter is not std::nullopt, it will be filtered for from all entries.
+    virtual std::vector<ContentProviderEntry> ListEntriesFilter(
+        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
+        std::optional<u64> title_id = {}) const = 0;
+
+protected:
+    // A single instance of KeyManager to be used by GetEntry()
+    Core::Crypto::KeyManager keys;
+};
 
 /*
  * A class that catalogues NCAs in the registered directory structure.
@@ -67,39 +102,32 @@ bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs
  * (This impl also supports substituting the nca dir for an nca file, as that's more convenient
  * when 4GB splitting can be ignored.)
  */
-class RegisteredCache {
-    friend class RegisteredCacheUnion;
-
+class RegisteredCache : public ContentProvider {
 public:
     // Parsing function defines the conversion from raw file to NCA. If there are other steps
     // besides creating the NCA from the file (e.g. NAX0 on SD Card), that should go in a custom
     // parsing function.
     explicit RegisteredCache(VirtualDir dir,
-                             RegisteredCacheParsingFunction parsing_function =
+                             ContentProviderParsingFunction parsing_function =
                                  [](const VirtualFile& file, const NcaID& id) { return file; });
-    ~RegisteredCache();
+    ~RegisteredCache() override;
 
-    void Refresh();
+    void Refresh() override;
 
-    bool HasEntry(u64 title_id, ContentRecordType type) const;
-    bool HasEntry(RegisteredCacheEntry entry) const;
+    bool HasEntry(u64 title_id, ContentRecordType type) const override;
 
-    std::optional<u32> GetEntryVersion(u64 title_id) const;
+    std::optional<u32> GetEntryVersion(u64 title_id) const override;
 
-    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
+    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
 
-    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryRaw(RegisteredCacheEntry entry) const;
+    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
 
-    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const;
-    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;
+    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
 
-    std::vector<RegisteredCacheEntry> ListEntries() const;
     // If a parameter is not std::nullopt, it will be filtered for from all entries.
-    std::vector<RegisteredCacheEntry> ListEntriesFilter(
+    std::vector<ContentProviderEntry> ListEntriesFilter(
         std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
-        std::optional<u64> title_id = {}) const;
+        std::optional<u64> title_id = {}) const override;
 
     // Raw copies all the ncas from the xci/nsp to the csache. Does some quick checks to make sure
     // there is a meta NCA and all of them are accessible.
@@ -131,46 +159,70 @@ private:
     bool RawInstallYuzuMeta(const CNMT& cnmt);
 
     VirtualDir dir;
-    RegisteredCacheParsingFunction parser;
-    Core::Crypto::KeyManager keys;
+    ContentProviderParsingFunction parser;
 
     // maps tid -> NcaID of meta
-    boost::container::flat_map<u64, NcaID> meta_id;
+    std::map<u64, NcaID> meta_id;
     // maps tid -> meta
-    boost::container::flat_map<u64, CNMT> meta;
+    std::map<u64, CNMT> meta;
     // maps tid -> meta for CNMT in yuzu_meta
-    boost::container::flat_map<u64, CNMT> yuzu_meta;
+    std::map<u64, CNMT> yuzu_meta;
 };
 
-// Combines multiple RegisteredCaches (i.e. SysNAND, UserNAND, SDMC) into one interface.
-class RegisteredCacheUnion {
-public:
-    explicit RegisteredCacheUnion(std::vector<RegisteredCache*> caches);
-
-    void Refresh();
-
-    bool HasEntry(u64 title_id, ContentRecordType type) const;
-    bool HasEntry(RegisteredCacheEntry entry) const;
-
-    std::optional<u32> GetEntryVersion(u64 title_id) const;
-
-    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
-
-    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryRaw(RegisteredCacheEntry entry) const;
-
-    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const;
-    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;
+enum class ContentProviderUnionSlot {
+    SysNAND,        ///< System NAND
+    UserNAND,       ///< User NAND
+    SDMC,           ///< SD Card
+    FrontendManual, ///< Frontend-defined game list or similar
+};
 
-    std::vector<RegisteredCacheEntry> ListEntries() const;
-    // If a parameter is not std::nullopt, it will be filtered for from all entries.
-    std::vector<RegisteredCacheEntry> ListEntriesFilter(
+// Combines multiple ContentProvider(s) (i.e. SysNAND, UserNAND, SDMC) into one interface.
+class ContentProviderUnion : public ContentProvider {
+public:
+    ~ContentProviderUnion() override;
+
+    void SetSlot(ContentProviderUnionSlot slot, ContentProvider* provider);
+    void ClearSlot(ContentProviderUnionSlot slot);
+
+    void Refresh() override;
+    bool HasEntry(u64 title_id, ContentRecordType type) const override;
+    std::optional<u32> GetEntryVersion(u64 title_id) const override;
+    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
+    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
+    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
+    std::vector<ContentProviderEntry> ListEntriesFilter(
+        std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+        std::optional<u64> title_id) const override;
+
+    std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> ListEntriesFilterOrigin(
+        std::optional<ContentProviderUnionSlot> origin = {},
         std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
         std::optional<u64> title_id = {}) const;
 
 private:
-    std::vector<RegisteredCache*> caches;
+    std::map<ContentProviderUnionSlot, ContentProvider*> providers;
+};
+
+class ManualContentProvider : public ContentProvider {
+public:
+    ~ManualContentProvider() override;
+
+    void AddEntry(TitleType title_type, ContentRecordType content_type, u64 title_id,
+                  VirtualFile file);
+    void ClearAllEntries();
+
+    void Refresh() override;
+    bool HasEntry(u64 title_id, ContentRecordType type) const override;
+    std::optional<u32> GetEntryVersion(u64 title_id) const override;
+    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
+    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
+    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
+    std::vector<ContentProviderEntry> ListEntriesFilter(
+        std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+        std::optional<u64> title_id) const override;
+
+private:
+    std::map<std::tuple<TitleType, ContentRecordType, u64>, VirtualFile> entries;
 };
 
 } // namespace FileSys
diff --git a/src/core/file_sys/romfs_factory.cpp b/src/core/file_sys/romfs_factory.cpp
index 6ad1e4f86..b2ccb2926 100644
--- a/src/core/file_sys/romfs_factory.cpp
+++ b/src/core/file_sys/romfs_factory.cpp
@@ -48,7 +48,7 @@ ResultVal<VirtualFile> RomFSFactory::Open(u64 title_id, StorageId storage, Conte
 
     switch (storage) {
     case StorageId::None:
-        res = Service::FileSystem::GetUnionContents().GetEntry(title_id, type);
+        res = Core::System::GetInstance().GetContentProvider().GetEntry(title_id, type);
         break;
     case StorageId::NandSystem:
         res = Service::FileSystem::GetSystemNANDContents()->GetEntry(title_id, type);
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index 1913dc956..7974b031d 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -16,8 +16,10 @@ namespace FileSys {
 constexpr char SAVE_DATA_SIZE_FILENAME[] = ".yuzu_save_size";
 
 std::string SaveDataDescriptor::DebugInfo() const {
-    return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}]",
-                       static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id);
+    return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}, "
+                       "rank={}, index={}]",
+                       static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id,
+                       static_cast<u8>(rank), index);
 }
 
 SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save_directory)) {
@@ -28,7 +30,7 @@ SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save
 
 SaveDataFactory::~SaveDataFactory() = default;
 
-ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, SaveDataDescriptor meta) {
+ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, const SaveDataDescriptor& meta) {
     if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) {
         if (meta.zero_1 != 0) {
             LOG_WARNING(Service_FS,
diff --git a/src/core/file_sys/savedata_factory.h b/src/core/file_sys/savedata_factory.h
index 3a1caf292..b73654571 100644
--- a/src/core/file_sys/savedata_factory.h
+++ b/src/core/file_sys/savedata_factory.h
@@ -32,12 +32,19 @@ enum class SaveDataType : u8 {
     CacheStorage = 5,
 };
 
+enum class SaveDataRank : u8 {
+    Primary,
+    Secondary,
+};
+
 struct SaveDataDescriptor {
     u64_le title_id;
     u128 user_id;
     u64_le save_id;
     SaveDataType type;
-    INSERT_PADDING_BYTES(7);
+    SaveDataRank rank;
+    u16_le index;
+    INSERT_PADDING_BYTES(4);
     u64_le zero_1;
     u64_le zero_2;
     u64_le zero_3;
@@ -57,7 +64,7 @@ public:
     explicit SaveDataFactory(VirtualDir dir);
     ~SaveDataFactory();
 
-    ResultVal<VirtualDir> Open(SaveDataSpaceId space, SaveDataDescriptor meta);
+    ResultVal<VirtualDir> Open(SaveDataSpaceId space, const SaveDataDescriptor& meta);
 
     VirtualDir GetSaveDataSpaceDirectory(SaveDataSpaceId space) const;
 
diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp
index e1a4210db..c69caae0f 100644
--- a/src/core/file_sys/submission_package.cpp
+++ b/src/core/file_sys/submission_package.cpp
@@ -143,11 +143,12 @@ std::multimap<u64, std::shared_ptr<NCA>> NSP::GetNCAsByTitleID() const {
     return out;
 }
 
-std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> NSP::GetNCAs() const {
+std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>>
+NSP::GetNCAs() const {
     return ncas;
 }
 
-std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type) const {
+std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type, TitleType title_type) const {
     if (extracted)
         LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");
 
@@ -155,14 +156,14 @@ std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type) const {
     if (title_id_iter == ncas.end())
         return nullptr;
 
-    const auto type_iter = title_id_iter->second.find(type);
+    const auto type_iter = title_id_iter->second.find({title_type, type});
     if (type_iter == title_id_iter->second.end())
         return nullptr;
 
     return type_iter->second;
 }
 
-VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type) const {
+VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type, TitleType title_type) const {
     if (extracted)
         LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");
     const auto nca = GetNCA(title_id, type);
@@ -240,7 +241,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {
             const CNMT cnmt(inner_file);
             auto& ncas_title = ncas[cnmt.GetTitleID()];
 
-            ncas_title[ContentRecordType::Meta] = nca;
+            ncas_title[{cnmt.GetType(), ContentRecordType::Meta}] = nca;
             for (const auto& rec : cnmt.GetContentRecords()) {
                 const auto id_string = Common::HexArrayToString(rec.nca_id, false);
                 const auto next_file = pfs->GetFile(fmt::format("{}.nca", id_string));
@@ -258,7 +259,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {
                 if (next_nca->GetStatus() == Loader::ResultStatus::Success ||
                     (next_nca->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS &&
                      (cnmt.GetTitleID() & 0x800) != 0)) {
-                    ncas_title[rec.type] = std::move(next_nca);
+                    ncas_title[{cnmt.GetType(), rec.type}] = std::move(next_nca);
                 }
             }
 
diff --git a/src/core/file_sys/submission_package.h b/src/core/file_sys/submission_package.h
index 9a28ed5bb..ee9b6ce17 100644
--- a/src/core/file_sys/submission_package.h
+++ b/src/core/file_sys/submission_package.h
@@ -42,9 +42,12 @@ public:
     // Type 0 Only (Collection of NCAs + Certificate + Ticket + Meta XML)
     std::vector<std::shared_ptr<NCA>> GetNCAsCollapsed() const;
     std::multimap<u64, std::shared_ptr<NCA>> GetNCAsByTitleID() const;
-    std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> GetNCAs() const;
-    std::shared_ptr<NCA> GetNCA(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetNCAFile(u64 title_id, ContentRecordType type) const;
+    std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> GetNCAs()
+        const;
+    std::shared_ptr<NCA> GetNCA(u64 title_id, ContentRecordType type,
+                                TitleType title_type = TitleType::Application) const;
+    VirtualFile GetNCAFile(u64 title_id, ContentRecordType type,
+                           TitleType title_type = TitleType::Application) const;
     std::vector<Core::Crypto::Key128> GetTitlekey() const;
 
     std::vector<VirtualFile> GetFiles() const override;
@@ -67,7 +70,7 @@ private:
 
     std::shared_ptr<PartitionFilesystem> pfs;
     // Map title id -> {map type -> NCA}
-    std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> ncas;
+    std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> ncas;
     std::vector<VirtualFile> ticket_files;
 
     Core::Crypto::KeyManager keys;
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index e3e79f40a..c9722ed77 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -6,6 +6,7 @@
 #include "core/file_sys/romfs.h"
 #include "core/file_sys/system_archive/ng_word.h"
 #include "core/file_sys/system_archive/system_archive.h"
+#include "core/file_sys/system_archive/system_version.h"
 
 namespace FileSys::SystemArchive {
 
@@ -30,7 +31,7 @@ constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHI
     {0x0100000000000806, "NgWord", &NgWord1},
     {0x0100000000000807, "SsidList", nullptr},
     {0x0100000000000808, "Dictionary", nullptr},
-    {0x0100000000000809, "SystemVersion", nullptr},
+    {0x0100000000000809, "SystemVersion", &SystemVersion},
     {0x010000000000080A, "AvatarImage", nullptr},
     {0x010000000000080B, "LocalNews", nullptr},
     {0x010000000000080C, "Eula", nullptr},
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
new file mode 100644
index 000000000..6e22f97b0
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/file_sys/system_archive/system_version.h"
+#include "core/file_sys/vfs_vector.h"
+
+namespace FileSys::SystemArchive {
+
+namespace SystemVersionData {
+
+// This section should reflect the best system version to describe yuzu's HLE api.
+// TODO(DarkLordZach): Update when HLE gets better.
+
+constexpr u8 VERSION_MAJOR = 5;
+constexpr u8 VERSION_MINOR = 1;
+constexpr u8 VERSION_MICRO = 0;
+
+constexpr u8 REVISION_MAJOR = 3;
+constexpr u8 REVISION_MINOR = 0;
+
+constexpr char PLATFORM_STRING[] = "NX";
+constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
+constexpr char DISPLAY_VERSION[] = "5.1.0";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
+
+} // namespace SystemVersionData
+
+std::string GetLongDisplayVersion() {
+    return SystemVersionData::DISPLAY_TITLE;
+}
+
+VirtualDir SystemVersion() {
+    VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file");
+    file->WriteObject(SystemVersionData::VERSION_MAJOR, 0);
+    file->WriteObject(SystemVersionData::VERSION_MINOR, 1);
+    file->WriteObject(SystemVersionData::VERSION_MICRO, 2);
+    file->WriteObject(SystemVersionData::REVISION_MAJOR, 4);
+    file->WriteObject(SystemVersionData::REVISION_MINOR, 5);
+    file->WriteArray(SystemVersionData::PLATFORM_STRING,
+                     std::min<u64>(sizeof(SystemVersionData::PLATFORM_STRING), 0x20ULL), 0x8);
+    file->WriteArray(SystemVersionData::VERSION_HASH,
+                     std::min<u64>(sizeof(SystemVersionData::VERSION_HASH), 0x40ULL), 0x28);
+    file->WriteArray(SystemVersionData::DISPLAY_VERSION,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_VERSION), 0x18ULL), 0x68);
+    file->WriteArray(SystemVersionData::DISPLAY_TITLE,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_TITLE), 0x80ULL), 0x80);
+    return std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{file},
+                                                std::vector<VirtualDir>{}, "data");
+}
+
+} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/system_version.h b/src/core/file_sys/system_archive/system_version.h
new file mode 100644
index 000000000..deed79b26
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.h
@@ -0,0 +1,16 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include "core/file_sys/vfs_types.h"
+
+namespace FileSys::SystemArchive {
+
+std::string GetLongDisplayVersion();
+
+VirtualDir SystemVersion();
+
+} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 515626658..75fc04302 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
     if (offset + length > data.size())
         data.resize(offset + length);
     const auto write = std::min(length, data.size() - offset);
-    std::memcpy(data.data(), data_, write);
+    std::memcpy(data.data() + offset, data_, write);
     return write;
 }
 
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 9dd493efb..1320bbe77 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -30,7 +30,7 @@ private:
         explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
         std::tuple<float, float, bool> GetStatus() const override {
             if (auto state = touch_state.lock()) {
-                std::lock_guard<std::mutex> guard(state->mutex);
+                std::lock_guard guard{state->mutex};
                 return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
             }
             return std::make_tuple(0.0f, 0.0f, false);
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
             framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
 }
 
-std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
+std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
     new_x = std::max(new_x, framebuffer_layout.screen.left);
     new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
 
@@ -81,7 +81,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
     if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
         return;
 
-    std::lock_guard<std::mutex> guard(touch_state->mutex);
+    std::lock_guard guard{touch_state->mutex};
     touch_state->touch_x = static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) /
                            (framebuffer_layout.screen.right - framebuffer_layout.screen.left);
     touch_state->touch_y = static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) /
@@ -91,7 +91,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
 }
 
 void EmuWindow::TouchReleased() {
-    std::lock_guard<std::mutex> guard(touch_state->mutex);
+    std::lock_guard guard{touch_state->mutex};
     touch_state->touch_pressed = false;
     touch_state->touch_x = 0;
     touch_state->touch_y = 0;
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 75c2be4ae..70a522556 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -187,7 +187,7 @@ private:
     /**
      * Clip the provided coordinates to be inside the touchscreen area.
      */
-    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
+    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
 };
 
 } // namespace Core::Frontend
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index f8662d193..a1357179f 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {
 
 // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
 template <class T>
-static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
-                                           float screen_aspect_ratio) {
+static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
+                                         float screen_aspect_ratio) {
     float scale = std::min(static_cast<float>(window_area.GetWidth()),
                            window_area.GetHeight() / screen_aspect_ratio);
-    return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
-                                  static_cast<T>(std::round(scale * screen_aspect_ratio))};
+    return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
+                                static_cast<T>(std::round(scale * screen_aspect_ratio))};
 }
 
 FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
 
     const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
                                        ScreenUndocked::Width};
-    MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
-    MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
+    Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
+    Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
 
     float window_aspect_ratio = static_cast<float>(height) / width;
 
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e06647794..c2c63d08c 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
     unsigned width{ScreenUndocked::Width};
     unsigned height{ScreenUndocked::Height};
 
-    MathUtil::Rectangle<unsigned> screen;
+    Common::Rectangle<unsigned> screen;
 
     /**
      * Returns the ration of pixel size of the screen, compared to the native size of the undocked
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 16fdcd376..7c11d7546 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
  *   Orientation is determined by right-hand rule.
  *   Units: deg/sec
  */
-using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
+using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
 
 /**
  * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
diff --git a/src/core/frontend/scope_acquire_window_context.cpp b/src/core/frontend/scope_acquire_window_context.cpp
new file mode 100644
index 000000000..3663dad17
--- /dev/null
+++ b/src/core/frontend/scope_acquire_window_context.cpp
@@ -0,0 +1,18 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/frontend/emu_window.h"
+#include "core/frontend/scope_acquire_window_context.h"
+
+namespace Core::Frontend {
+
+ScopeAcquireWindowContext::ScopeAcquireWindowContext(Core::Frontend::EmuWindow& emu_window_)
+    : emu_window{emu_window_} {
+    emu_window.MakeCurrent();
+}
+ScopeAcquireWindowContext::~ScopeAcquireWindowContext() {
+    emu_window.DoneCurrent();
+}
+
+} // namespace Core::Frontend
diff --git a/src/core/frontend/scope_acquire_window_context.h b/src/core/frontend/scope_acquire_window_context.h
new file mode 100644
index 000000000..2d9f6e825
--- /dev/null
+++ b/src/core/frontend/scope_acquire_window_context.h
@@ -0,0 +1,23 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Core::Frontend {
+
+class EmuWindow;
+
+/// Helper class to acquire/release window context within a given scope
+class ScopeAcquireWindowContext : NonCopyable {
+public:
+    explicit ScopeAcquireWindowContext(Core::Frontend::EmuWindow& window);
+    ~ScopeAcquireWindowContext();
+
+private:
+    Core::Frontend::EmuWindow& emu_window;
+};
+
+} // namespace Core::Frontend
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index a1cad4fcb..afa812598 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -507,8 +507,11 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {
 
     LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}",
               bp->second.len, bp->second.addr, static_cast<int>(type));
-    Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
-    Core::System::GetInstance().InvalidateCpuInstructionCaches();
+
+    if (type == BreakpointType::Execute) {
+        Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
+        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    }
     p.erase(addr);
 }
 
@@ -1027,7 +1030,7 @@ static void Step() {
 
 /// Tell the CPU if we hit a memory breakpoint.
 bool IsMemoryBreak() {
-    if (IsConnected()) {
+    if (!IsConnected()) {
         return false;
     }
 
@@ -1057,9 +1060,12 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
     breakpoint.addr = addr;
     breakpoint.len = len;
     Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
+
     static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
-    Memory::WriteBlock(addr, btrap.data(), btrap.size());
-    Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    if (type == BreakpointType::Execute) {
+        Memory::WriteBlock(addr, btrap.data(), btrap.size());
+        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    }
     p.insert({addr, breakpoint});
 
     LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}",
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index ed84197b3..fae54bcc7 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@
 
 #pragma once
 
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
-#include "core/hle/kernel/errors.h"
-#include "core/memory.h"
 
 namespace IPC {
 
@@ -39,10 +39,10 @@ struct CommandHeader {
     union {
         u32_le raw_low;
         BitField<0, 16, CommandType> type;
-        BitField<16, 4, u32_le> num_buf_x_descriptors;
-        BitField<20, 4, u32_le> num_buf_a_descriptors;
-        BitField<24, 4, u32_le> num_buf_b_descriptors;
-        BitField<28, 4, u32_le> num_buf_w_descriptors;
+        BitField<16, 4, u32> num_buf_x_descriptors;
+        BitField<20, 4, u32> num_buf_a_descriptors;
+        BitField<24, 4, u32> num_buf_b_descriptors;
+        BitField<28, 4, u32> num_buf_w_descriptors;
     };
 
     enum class BufferDescriptorCFlag : u32 {
@@ -53,28 +53,28 @@ struct CommandHeader {
 
     union {
         u32_le raw_high;
-        BitField<0, 10, u32_le> data_size;
+        BitField<0, 10, u32> data_size;
         BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags;
-        BitField<31, 1, u32_le> enable_handle_descriptor;
+        BitField<31, 1, u32> enable_handle_descriptor;
     };
 };
 static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect");
 
 union HandleDescriptorHeader {
     u32_le raw_high;
-    BitField<0, 1, u32_le> send_current_pid;
-    BitField<1, 4, u32_le> num_handles_to_copy;
-    BitField<5, 4, u32_le> num_handles_to_move;
+    BitField<0, 1, u32> send_current_pid;
+    BitField<1, 4, u32> num_handles_to_copy;
+    BitField<5, 4, u32> num_handles_to_move;
 };
 static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect");
 
 struct BufferDescriptorX {
     union {
-        BitField<0, 6, u32_le> counter_bits_0_5;
-        BitField<6, 3, u32_le> address_bits_36_38;
-        BitField<9, 3, u32_le> counter_bits_9_11;
-        BitField<12, 4, u32_le> address_bits_32_35;
-        BitField<16, 16, u32_le> size;
+        BitField<0, 6, u32> counter_bits_0_5;
+        BitField<6, 3, u32> address_bits_36_38;
+        BitField<9, 3, u32> counter_bits_9_11;
+        BitField<12, 4, u32> address_bits_32_35;
+        BitField<16, 16, u32> size;
     };
 
     u32_le address_bits_0_31;
@@ -103,10 +103,10 @@ struct BufferDescriptorABW {
     u32_le address_bits_0_31;
 
     union {
-        BitField<0, 2, u32_le> flags;
-        BitField<2, 3, u32_le> address_bits_36_38;
-        BitField<24, 4, u32_le> size_bits_32_35;
-        BitField<28, 4, u32_le> address_bits_32_35;
+        BitField<0, 2, u32> flags;
+        BitField<2, 3, u32> address_bits_36_38;
+        BitField<24, 4, u32> size_bits_32_35;
+        BitField<28, 4, u32> address_bits_32_35;
     };
 
     VAddr Address() const {
@@ -128,8 +128,8 @@ struct BufferDescriptorC {
     u32_le address_bits_0_31;
 
     union {
-        BitField<0, 16, u32_le> address_bits_32_47;
-        BitField<16, 16, u32_le> size;
+        BitField<0, 16, u32> address_bits_32_47;
+        BitField<16, 16, u32> size;
     };
 
     VAddr Address() const {
@@ -167,8 +167,8 @@ struct DomainMessageHeader {
         struct {
             union {
                 BitField<0, 8, CommandType> command;
-                BitField<8, 8, u32_le> input_object_count;
-                BitField<16, 16, u32_le> size;
+                BitField<8, 8, u32> input_object_count;
+                BitField<16, 16, u32> size;
             };
             u32_le object_id;
             INSERT_PADDING_WORDS(2);
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 0d6c85aed..ac0e1d796 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,9 +19,12 @@
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/result.h"
 
 namespace IPC {
 
+constexpr ResultCode ERR_REMOTE_PROCESS_DEAD{ErrorModule::HIPC, 301};
+
 class RequestHelperBase {
 protected:
     Kernel::HLERequestContext* context = nullptr;
@@ -136,10 +139,8 @@ public:
             context->AddDomainObject(std::move(iface));
         } else {
             auto& kernel = Core::System::GetInstance().Kernel();
-            auto sessions =
+            auto [server, client] =
                 Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
-            auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions);
-            auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions);
             iface->ClientConnected(server);
             context->AddMoveObject(std::move(client));
         }
@@ -217,6 +218,11 @@ private:
 /// Push ///
 
 template <>
+inline void ResponseBuilder::Push(s32 value) {
+    cmdbuf[index++] = static_cast<u32>(value);
+}
+
+template <>
 inline void ResponseBuilder::Push(u32 value) {
     cmdbuf[index++] = value;
 }
@@ -235,6 +241,22 @@ inline void ResponseBuilder::Push(ResultCode value) {
 }
 
 template <>
+inline void ResponseBuilder::Push(s8 value) {
+    PushRaw(value);
+}
+
+template <>
+inline void ResponseBuilder::Push(s16 value) {
+    PushRaw(value);
+}
+
+template <>
+inline void ResponseBuilder::Push(s64 value) {
+    Push(static_cast<u32>(value));
+    Push(static_cast<u32>(value >> 32));
+}
+
+template <>
 inline void ResponseBuilder::Push(u8 value) {
     PushRaw(value);
 }
@@ -251,6 +273,20 @@ inline void ResponseBuilder::Push(u64 value) {
 }
 
 template <>
+inline void ResponseBuilder::Push(float value) {
+    u32 integral;
+    std::memcpy(&integral, &value, sizeof(u32));
+    Push(integral);
+}
+
+template <>
+inline void ResponseBuilder::Push(double value) {
+    u64 integral;
+    std::memcpy(&integral, &value, sizeof(u64));
+    Push(integral);
+}
+
+template <>
 inline void ResponseBuilder::Push(bool value) {
     Push(static_cast<u8>(value));
 }
@@ -329,7 +365,7 @@ public:
     template <class T>
     std::shared_ptr<T> PopIpcInterface() {
         ASSERT(context->Session()->IsDomain());
-        ASSERT(context->GetDomainMessageHeader()->input_object_count > 0);
+        ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
         return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
     }
 };
@@ -341,6 +377,11 @@ inline u32 RequestParser::Pop() {
     return cmdbuf[index++];
 }
 
+template <>
+inline s32 RequestParser::Pop() {
+    return static_cast<s32>(Pop<u32>());
+}
+
 template <typename T>
 void RequestParser::PopRaw(T& value) {
     std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -372,11 +413,37 @@ inline u64 RequestParser::Pop() {
 }
 
 template <>
+inline s8 RequestParser::Pop() {
+    return static_cast<s8>(Pop<u8>());
+}
+
+template <>
+inline s16 RequestParser::Pop() {
+    return static_cast<s16>(Pop<u16>());
+}
+
+template <>
 inline s64 RequestParser::Pop() {
     return static_cast<s64>(Pop<u64>());
 }
 
 template <>
+inline float RequestParser::Pop() {
+    const u32 value = Pop<u32>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
+template <>
+inline double RequestParser::Pop() {
+    const u64 value = Pop<u64>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
+template <>
 inline bool RequestParser::Pop() {
     return Pop<u8>() != 0;
 }
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 57157beb4..c8842410b 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "core/core.h"
 #include "core/core_cpu.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
@@ -18,58 +19,15 @@
 #include "core/memory.h"
 
 namespace Kernel {
-namespace AddressArbiter {
-
-// Performs actual address waiting logic.
-static ResultCode WaitForAddress(VAddr address, s64 timeout) {
-    SharedPtr<Thread> current_thread = GetCurrentThread();
-    current_thread->SetArbiterWaitAddress(address);
-    current_thread->SetStatus(ThreadStatus::WaitArb);
-    current_thread->InvalidateWakeupCallback();
-
-    current_thread->WakeAfterDelay(timeout);
-
-    Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
-    return RESULT_TIMEOUT;
-}
-
-// Gets the threads waiting on an address.
-static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
-    const auto RetrieveWaitingThreads = [](std::size_t core_index,
-                                           std::vector<SharedPtr<Thread>>& waiting_threads,
-                                           VAddr arb_addr) {
-        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
-        const auto& thread_list = scheduler.GetThreadList();
-
-        for (const auto& thread : thread_list) {
-            if (thread->GetArbiterWaitAddress() == arb_addr)
-                waiting_threads.push_back(thread);
-        }
-    };
-
-    // Retrieve all threads that are waiting for this address.
-    std::vector<SharedPtr<Thread>> threads;
-    RetrieveWaitingThreads(0, threads, address);
-    RetrieveWaitingThreads(1, threads, address);
-    RetrieveWaitingThreads(2, threads, address);
-    RetrieveWaitingThreads(3, threads, address);
-
-    // Sort them by priority, such that the highest priority ones come first.
-    std::sort(threads.begin(), threads.end(),
-              [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
-                  return lhs->GetPriority() < rhs->GetPriority();
-              });
-
-    return threads;
-}
-
+namespace {
 // Wake up num_to_wake (or all) threads in a vector.
-static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
+void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
     // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
     // them all.
     std::size_t last = waiting_threads.size();
-    if (num_to_wake > 0)
-        last = num_to_wake;
+    if (num_to_wake > 0) {
+        last = std::min(last, static_cast<std::size_t>(num_to_wake));
+    }
 
     // Signal the waiting threads.
     for (std::size_t i = 0; i < last; i++) {
@@ -79,88 +37,114 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num
         waiting_threads[i]->ResumeFromWait();
     }
 }
+} // Anonymous namespace
+
+AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
+AddressArbiter::~AddressArbiter() = default;
+
+ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
+                                           s32 num_to_wake) {
+    switch (type) {
+    case SignalType::Signal:
+        return SignalToAddressOnly(address, num_to_wake);
+    case SignalType::IncrementAndSignalIfEqual:
+        return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
+    case SignalType::ModifyByWaitingCountAndSignalIfEqual:
+        return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
+    default:
+        return ERR_INVALID_ENUM_VALUE;
+    }
+}
 
-// Signals an address being waited on.
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
-
+ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
     WakeThreads(waiting_threads, num_to_wake);
     return RESULT_SUCCESS;
 }
 
-// Signals an address being waited on and increments its value if equal to the value argument.
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
+ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                              s32 num_to_wake) {
     // Ensure that we can write to the address.
     if (!Memory::IsValidVirtualAddress(address)) {
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(value + 1));
-    } else {
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
         return ERR_INVALID_STATE;
     }
 
-    return SignalToAddress(address, num_to_wake);
+    Memory::Write32(address, static_cast<u32>(value + 1));
+    return SignalToAddressOnly(address, num_to_wake);
 }
 
-// Signals an address being waited on and modifies its value based on waiting thread count if equal
-// to the value argument.
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                         s32 num_to_wake) {
+ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                                         s32 num_to_wake) {
     // Ensure that we can write to the address.
     if (!Memory::IsValidVirtualAddress(address)) {
         return ERR_INVALID_ADDRESS_STATE;
     }
 
     // Get threads waiting on the address.
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
 
     // Determine the modified value depending on the waiting count.
     s32 updated_value;
     if (waiting_threads.empty()) {
-        updated_value = value - 1;
-    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
         updated_value = value + 1;
+    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+        updated_value = value - 1;
     } else {
         updated_value = value;
     }
 
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(updated_value));
-    } else {
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
         return ERR_INVALID_STATE;
     }
 
+    Memory::Write32(address, static_cast<u32>(updated_value));
     WakeThreads(waiting_threads, num_to_wake);
     return RESULT_SUCCESS;
 }
 
-// Waits on an address if the value passed is less than the argument value, optionally decrementing.
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
+ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
+                                          s64 timeout_ns) {
+    switch (type) {
+    case ArbitrationType::WaitIfLessThan:
+        return WaitForAddressIfLessThan(address, value, timeout_ns, false);
+    case ArbitrationType::DecrementAndWaitIfLessThan:
+        return WaitForAddressIfLessThan(address, value, timeout_ns, true);
+    case ArbitrationType::WaitIfEqual:
+        return WaitForAddressIfEqual(address, value, timeout_ns);
+    default:
+        return ERR_INVALID_ENUM_VALUE;
+    }
+}
+
+ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                                    bool should_decrement) {
     // Ensure that we can read the address.
     if (!Memory::IsValidVirtualAddress(address)) {
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    s32 cur_value = static_cast<s32>(Memory::Read32(address));
-    if (cur_value < value) {
-        if (should_decrement) {
-            Memory::Write32(address, static_cast<u32>(cur_value - 1));
-        }
-    } else {
+    const s32 cur_value = static_cast<s32>(Memory::Read32(address));
+    if (cur_value >= value) {
         return ERR_INVALID_STATE;
     }
+
+    if (should_decrement) {
+        Memory::Write32(address, static_cast<u32>(cur_value - 1));
+    }
+
     // Short-circuit without rescheduling, if timeout is zero.
     if (timeout == 0) {
         return RESULT_TIMEOUT;
     }
 
-    return WaitForAddress(address, timeout);
+    return WaitForAddressImpl(address, timeout);
 }
 
-// Waits on an address if the value passed is equal to the argument value.
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
+ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
     // Ensure that we can read the address.
     if (!Memory::IsValidVirtualAddress(address)) {
         return ERR_INVALID_ADDRESS_STATE;
@@ -174,7 +158,48 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
         return RESULT_TIMEOUT;
     }
 
-    return WaitForAddress(address, timeout);
+    return WaitForAddressImpl(address, timeout);
+}
+
+ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
+    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
+    current_thread->SetArbiterWaitAddress(address);
+    current_thread->SetStatus(ThreadStatus::WaitArb);
+    current_thread->InvalidateWakeupCallback();
+
+    current_thread->WakeAfterDelay(timeout);
+
+    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    return RESULT_TIMEOUT;
+}
+
+std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
+    const auto RetrieveWaitingThreads = [this](std::size_t core_index,
+                                               std::vector<SharedPtr<Thread>>& waiting_threads,
+                                               VAddr arb_addr) {
+        const auto& scheduler = system.Scheduler(core_index);
+        const auto& thread_list = scheduler.GetThreadList();
+
+        for (const auto& thread : thread_list) {
+            if (thread->GetArbiterWaitAddress() == arb_addr) {
+                waiting_threads.push_back(thread);
+            }
+        }
+    };
+
+    // Retrieve all threads that are waiting for this address.
+    std::vector<SharedPtr<Thread>> threads;
+    RetrieveWaitingThreads(0, threads, address);
+    RetrieveWaitingThreads(1, threads, address);
+    RetrieveWaitingThreads(2, threads, address);
+    RetrieveWaitingThreads(3, threads, address);
+
+    // Sort them by priority, such that the highest priority ones come first.
+    std::sort(threads.begin(), threads.end(),
+              [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
+                  return lhs->GetPriority() < rhs->GetPriority();
+              });
+
+    return threads;
 }
-} // namespace AddressArbiter
 } // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index e3657b8e9..ed0d0e69f 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,31 +4,77 @@
 
 #pragma once
 
+#include <vector>
+
 #include "common/common_types.h"
+#include "core/hle/kernel/object.h"
 
 union ResultCode;
 
+namespace Core {
+class System;
+}
+
 namespace Kernel {
 
-namespace AddressArbiter {
-enum class ArbitrationType {
-    WaitIfLessThan = 0,
-    DecrementAndWaitIfLessThan = 1,
-    WaitIfEqual = 2,
-};
+class Thread;
 
-enum class SignalType {
-    Signal = 0,
-    IncrementAndSignalIfEqual = 1,
-    ModifyByWaitingCountAndSignalIfEqual = 2,
-};
+class AddressArbiter {
+public:
+    enum class ArbitrationType {
+        WaitIfLessThan = 0,
+        DecrementAndWaitIfLessThan = 1,
+        WaitIfEqual = 2,
+    };
+
+    enum class SignalType {
+        Signal = 0,
+        IncrementAndSignalIfEqual = 1,
+        ModifyByWaitingCountAndSignalIfEqual = 2,
+    };
+
+    explicit AddressArbiter(Core::System& system);
+    ~AddressArbiter();
+
+    AddressArbiter(const AddressArbiter&) = delete;
+    AddressArbiter& operator=(const AddressArbiter&) = delete;
+
+    AddressArbiter(AddressArbiter&&) = default;
+    AddressArbiter& operator=(AddressArbiter&&) = delete;
+
+    /// Signals an address being waited on with a particular signaling type.
+    ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
 
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
+    /// Waits on an address with a particular arbitration type.
+    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
 
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
-} // namespace AddressArbiter
+private:
+    /// Signals an address being waited on.
+    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
+
+    /// Signals an address being waited on and increments its value if equal to the value argument.
+    ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
+
+    /// Signals an address being waited on and modifies its value based on waiting thread count if
+    /// equal to the value argument.
+    ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                             s32 num_to_wake);
+
+    /// Waits on an address if the value passed is less than the argument value,
+    /// optionally decrementing.
+    ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                        bool should_decrement);
+
+    /// Waits on an address if the value passed is equal to the argument value.
+    ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
+
+    // Waits on the given address with a timeout in nanoseconds
+    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
+
+    // Gets the threads waiting on an address.
+    std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+
+    Core::System& system;
+};
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index d4c91d529..744b1697d 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <tuple>
-
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/errors.h"
@@ -31,17 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
     active_sessions++;
 
     // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
-    auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
+    auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
 
-    if (server_port->hle_handler)
-        server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
-    else
-        server_port->pending_sessions.push_back(std::get<SharedPtr<ServerSession>>(sessions));
+    if (server_port->HasHLEHandler()) {
+        server_port->GetHLEHandler()->ClientConnected(server);
+    } else {
+        server_port->AppendPendingSession(server);
+    }
 
     // Wake the threads waiting on the ServerPort
     server_port->WakeupAllWaitingThreads();
 
-    return MakeResult(std::get<SharedPtr<ClientSession>>(sessions));
+    return MakeResult(client);
 }
 
 void ClientPort::ConnectionClosed() {
diff --git a/src/core/hle/kernel/client_port.h b/src/core/hle/kernel/client_port.h
index 6cd607206..4921ad4f0 100644
--- a/src/core/hle/kernel/client_port.h
+++ b/src/core/hle/kernel/client_port.h
@@ -25,7 +25,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ClientPort;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ClientPort;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 704e82824..c17baa50a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
     // This destructor will be called automatically when the last ClientSession handle is closed by
     // the emulated application.
 
-    // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they
+    // A local reference to the ServerSession is necessary to guarantee it
     // will be kept alive until after ClientDisconnected() returns.
     SharedPtr<ServerSession> server = parent->server;
     if (server) {
-        std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler;
-        if (hle_handler)
-            hle_handler->ClientDisconnected(server);
-
-        // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
-        // their WaitSynchronization result to 0xC920181A.
-
-        // Clean up the list of client threads with pending requests, they are unneeded now that the
-        // client endpoint is closed.
-        server->pending_requesting_threads.clear();
-        server->currently_handling = nullptr;
+        server->ClientDisconnected();
     }
 
     parent->client = nullptr;
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index 4c18de69c..09cdff588 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -29,21 +29,22 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ClientSession;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ClientSession;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
 
     ResultCode SendSyncRequest(SharedPtr<Thread> thread);
 
-    std::string name; ///< Name of client port (optional)
+private:
+    explicit ClientSession(KernelCore& kernel);
+    ~ClientSession() override;
 
     /// The parent session, which links to the server endpoint.
     std::shared_ptr<Session> parent;
 
-private:
-    explicit ClientSession(KernelCore& kernel);
-    ~ClientSession() override;
+    /// Name of the client session (optional)
+    std::string name;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.cpp b/src/core/hle/kernel/code_set.cpp
new file mode 100644
index 000000000..1f434e9af
--- /dev/null
+++ b/src/core/hle/kernel/code_set.cpp
@@ -0,0 +1,12 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/code_set.h"
+
+namespace Kernel {
+
+CodeSet::CodeSet() = default;
+CodeSet::~CodeSet() = default;
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
new file mode 100644
index 000000000..879957dcb
--- /dev/null
+++ b/src/core/hle/kernel/code_set.h
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Kernel {
+
+/**
+ * Represents executable data that may be loaded into a kernel process.
+ *
+ * A code set consists of three basic segments:
+ *   - A code (AKA text) segment,
+ *   - A read-only data segment (rodata)
+ *   - A data segment
+ *
+ * The code segment is the portion of the object file that contains
+ * executable instructions.
+ *
+ * The read-only data segment in the portion of the object file that
+ * contains (as one would expect) read-only data, such as fixed constant
+ * values and data structures.
+ *
+ * The data segment is similar to the read-only data segment -- it contains
+ * variables and data structures that have predefined values, however,
+ * entities within this segment can be modified.
+ */
+struct CodeSet final {
+    /// A single segment within a code set.
+    struct Segment final {
+        /// The byte offset that this segment is located at.
+        std::size_t offset = 0;
+
+        /// The address to map this segment to.
+        VAddr addr = 0;
+
+        /// The size of this segment in bytes.
+        u32 size = 0;
+    };
+
+    explicit CodeSet();
+    ~CodeSet();
+
+    CodeSet(const CodeSet&) = delete;
+    CodeSet& operator=(const CodeSet&) = delete;
+
+    CodeSet(CodeSet&&) = default;
+    CodeSet& operator=(CodeSet&&) = default;
+
+    Segment& CodeSegment() {
+        return segments[0];
+    }
+
+    const Segment& CodeSegment() const {
+        return segments[0];
+    }
+
+    Segment& RODataSegment() {
+        return segments[1];
+    }
+
+    const Segment& RODataSegment() const {
+        return segments[1];
+    }
+
+    Segment& DataSegment() {
+        return segments[2];
+    }
+
+    const Segment& DataSegment() const {
+        return segments[2];
+    }
+
+    /// The overall data that backs this code set.
+    std::vector<u8> memory;
+
+    /// The segments that comprise this code set.
+    std::array<Segment, 3> segments;
+
+    /// The entry point address for this code set.
+    VAddr entrypoint = 0;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d17eb0cb6..8097b3863 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
+constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
 constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index c8acde5b1..bdfaa977f 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
 namespace Kernel {
 namespace {
 constexpr u16 GetSlot(Handle handle) {
-    return handle >> 15;
+    return static_cast<u16>(handle >> 15);
 }
 
 constexpr u16 GetGeneration(Handle handle) {
-    return handle & 0x7FFF;
+    return static_cast<u16>(handle & 0x7FFF);
 }
 } // Anonymous namespace
 
 HandleTable::HandleTable() {
-    next_generation = 1;
     Clear();
 }
 
 HandleTable::~HandleTable() = default;
 
+ResultCode HandleTable::SetSize(s32 handle_table_size) {
+    if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
+        return ERR_OUT_OF_MEMORY;
+    }
+
+    // Values less than or equal to zero indicate to use the maximum allowable
+    // size for the handle table in the actual kernel, so we ignore the given
+    // value in that case, since we assume this by default unless this function
+    // is called.
+    if (handle_table_size > 0) {
+        table_size = static_cast<u16>(handle_table_size);
+    }
+
+    return RESULT_SUCCESS;
+}
+
 ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
     DEBUG_ASSERT(obj != nullptr);
 
-    u16 slot = next_free_slot;
-    if (slot >= generations.size()) {
+    const u16 slot = next_free_slot;
+    if (slot >= table_size) {
         LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
         return ERR_HANDLE_TABLE_FULL;
     }
     next_free_slot = generations[slot];
 
-    u16 generation = next_generation++;
+    const u16 generation = next_generation++;
 
     // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
     // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
 }
 
 ResultCode HandleTable::Close(Handle handle) {
-    if (!IsValid(handle))
+    if (!IsValid(handle)) {
         return ERR_INVALID_HANDLE;
+    }
 
-    u16 slot = GetSlot(handle);
+    const u16 slot = GetSlot(handle);
 
     objects[slot] = nullptr;
 
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
 }
 
 bool HandleTable::IsValid(Handle handle) const {
-    std::size_t slot = GetSlot(handle);
-    u16 generation = GetGeneration(handle);
+    const std::size_t slot = GetSlot(handle);
+    const u16 generation = GetGeneration(handle);
 
-    return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
+    return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
 }
 
 SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
 }
 
 void HandleTable::Clear() {
-    for (u16 i = 0; i < MAX_COUNT; ++i) {
+    for (u16 i = 0; i < table_size; ++i) {
         generations[i] = i + 1;
         objects[i] = nullptr;
     }
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 89a3bc740..44901391b 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -50,6 +50,20 @@ public:
     ~HandleTable();
 
     /**
+     * Sets the number of handles that may be in use at one time
+     * for this handle table.
+     *
+     * @param handle_table_size The desired size to limit the handle table to.
+     *
+     * @returns an error code indicating if initialization was successful.
+     *          If initialization was not successful, then ERR_OUT_OF_MEMORY
+     *          will be returned.
+     *
+     * @pre handle_table_size must be within the range [0, 1024]
+     */
+    ResultCode SetSize(s32 handle_table_size);
+
+    /**
      * Allocates a handle for the given object.
      * @return The created Handle or one of the following errors:
      *           - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded.
@@ -104,13 +118,20 @@ private:
     std::array<u16, MAX_COUNT> generations;
 
     /**
+     * The limited size of the handle table. This can be specified by process
+     * capabilities in order to restrict the overall number of handles that
+     * can be created in a process instance
+     */
+    u16 table_size = static_cast<u16>(MAX_COUNT);
+
+    /**
      * Global counter of the number of created handles. Stored in `generations` when a handle is
      * created, and wraps around to 1 when it hits 0x8000.
      */
-    u16 next_generation;
+    u16 next_generation = 1;
 
     /// Head of the free slots linked list.
-    u16 next_free_slot;
+    u16 next_free_slot = 0;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5dd855db8..fe710eb6e 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
 void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
                                            bool incoming) {
     IPC::RequestParser rp(src_cmdbuf);
-    command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>());
+    command_header = rp.PopRaw<IPC::CommandHeader>();
 
     if (command_header->type == IPC::CommandType::Close) {
         // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
 
     // If handle descriptor is present, add size of it
     if (command_header->enable_handle_descriptor) {
-        handle_descriptor_header =
-            std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
+        handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
         if (handle_descriptor_header->send_current_pid) {
             rp.Skip(2, false);
         }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
         // If this is an incoming message, only CommandType "Request" has a domain header
         // All outgoing domain messages have the domain header, if only incoming has it
         if (incoming || domain_message_header) {
-            domain_message_header =
-                std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
+            domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
         } else {
-            if (Session()->IsDomain())
+            if (Session()->IsDomain()) {
                 LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
+            }
         }
     }
 
-    data_payload_header =
-        std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
+    data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
 
     data_payload_offset = rp.GetCurrentOffset();
 
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
         // Write the domain objects to the command buffer, these go after the raw untranslated data.
         // TODO(Subv): This completely ignores C buffers.
         std::size_t domain_offset = size - domain_message_header->num_objects;
-        auto& request_handlers = server_session->domain_request_handlers;
 
-        for (auto& object : domain_objects) {
-            request_handlers.emplace_back(object);
-            dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size());
+        for (const auto& object : domain_objects) {
+            server_session->AppendDomainRequestHandler(object);
+            dst_cmdbuf[domain_offset++] =
+                static_cast<u32_le>(server_session->NumDomainRequestHandlers());
         }
     }
 
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index cb1c5aff3..2bdd9f02c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -15,6 +16,8 @@
 #include "core/hle/ipc.h"
 #include "core/hle/kernel/object.h"
 
+union ResultCode;
+
 namespace Service {
 class ServiceFrameworkBase;
 }
@@ -166,12 +169,12 @@ public:
         return buffer_c_desciptors;
     }
 
-    const IPC::DomainMessageHeader* GetDomainMessageHeader() const {
-        return domain_message_header.get();
+    const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
+        return domain_message_header.value();
     }
 
     bool HasDomainMessageHeader() const {
-        return domain_message_header != nullptr;
+        return domain_message_header.has_value();
     }
 
     /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:
 
     template <typename T>
     SharedPtr<T> GetCopyObject(std::size_t index) {
-        ASSERT(index < copy_objects.size());
-        return DynamicObjectCast<T>(copy_objects[index]);
+        return DynamicObjectCast<T>(copy_objects.at(index));
     }
 
     template <typename T>
     SharedPtr<T> GetMoveObject(std::size_t index) {
-        ASSERT(index < move_objects.size());
-        return DynamicObjectCast<T>(move_objects[index]);
+        return DynamicObjectCast<T>(move_objects.at(index));
     }
 
     void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:
 
     template <typename T>
     std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
-        return std::static_pointer_cast<T>(domain_request_handlers[index]);
+        return std::static_pointer_cast<T>(domain_request_handlers.at(index));
     }
 
     void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
     boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
     boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
 
-    std::shared_ptr<IPC::CommandHeader> command_header;
-    std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header;
-    std::shared_ptr<IPC::DataPayloadHeader> data_payload_header;
-    std::shared_ptr<IPC::DomainMessageHeader> domain_message_header;
+    std::optional<IPC::CommandHeader> command_header;
+    std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
+    std::optional<IPC::DataPayloadHeader> data_payload_header;
+    std::optional<IPC::DomainMessageHeader> domain_message_header;
     std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
     std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
     std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 67674cd47..4d58e7c69 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,15 +12,16 @@
 
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/timer.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
+#include "core/memory.h"
 
 namespace Kernel {
 
@@ -29,12 +30,12 @@ namespace Kernel {
  * @param thread_handle The handle of the thread that's been awoken
  * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
  */
-static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) {
+static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
     const auto proper_handle = static_cast<Handle>(thread_handle);
     const auto& system = Core::System::GetInstance();
 
     // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
 
     SharedPtr<Thread> thread =
         system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle);
@@ -62,7 +63,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 
     if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 ||
         thread->GetWaitHandle() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex ||
+               thread->GetStatus() == ThreadStatus::WaitCondVar);
         thread->SetMutexWaitAddress(0);
         thread->SetCondVarWaitAddress(0);
         thread->SetWaitHandle(0);
@@ -86,27 +88,14 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
     }
 }
 
-/// The timer callback event, called when a timer is fired
-static void TimerCallback(u64 timer_handle, int cycles_late) {
-    const auto proper_handle = static_cast<Handle>(timer_handle);
-    const auto& system = Core::System::GetInstance();
-    SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle);
-
-    if (timer == nullptr) {
-        LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle);
-        return;
-    }
-
-    timer->Signal(cycles_late);
-}
-
 struct KernelCore::Impl {
+    explicit Impl(Core::System& system) : system{system} {}
+
     void Initialize(KernelCore& kernel) {
         Shutdown();
 
         InitializeSystemResourceLimit(kernel);
         InitializeThreads();
-        InitializeTimers();
     }
 
     void Shutdown() {
@@ -122,15 +111,12 @@ struct KernelCore::Impl {
         thread_wakeup_callback_handle_table.Clear();
         thread_wakeup_event_type = nullptr;
 
-        timer_callback_handle_table.Clear();
-        timer_callback_event_type = nullptr;
-
         named_ports.clear();
     }
 
     // Creates the default system resource limit
     void InitializeSystemResourceLimit(KernelCore& kernel) {
-        system_resource_limit = ResourceLimit::Create(kernel, "System");
+        system_resource_limit = ResourceLimit::Create(kernel);
 
         // If setting the default system values fails, then something seriously wrong has occurred.
         ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x200000000)
@@ -143,12 +129,7 @@ struct KernelCore::Impl {
 
     void InitializeThreads() {
         thread_wakeup_event_type =
-            CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
-    }
-
-    void InitializeTimers() {
-        timer_callback_handle_table.Clear();
-        timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback);
+            system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
     }
 
     std::atomic<u32> next_object_id{0};
@@ -161,13 +142,7 @@ struct KernelCore::Impl {
 
     SharedPtr<ResourceLimit> system_resource_limit;
 
-    /// The event type of the generic timer callback event
-    CoreTiming::EventType* timer_callback_event_type = nullptr;
-    // TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future,
-    // allowing us to simply use a pool index or similar.
-    Kernel::HandleTable timer_callback_handle_table;
-
-    CoreTiming::EventType* thread_wakeup_event_type = nullptr;
+    Core::Timing::EventType* thread_wakeup_event_type = nullptr;
     // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
     // allowing us to simply use a pool index or similar.
     Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -175,9 +150,12 @@ struct KernelCore::Impl {
     /// Map of named ports managed by the kernel, which can be retrieved using
     /// the ConnectToPort SVC.
     NamedPortTable named_ports;
+
+    // System context
+    Core::System& system;
 };
 
-KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {}
+KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
 KernelCore::~KernelCore() {
     Shutdown();
 }
@@ -198,16 +176,13 @@ SharedPtr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(Handle
     return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle);
 }
 
-SharedPtr<Timer> KernelCore::RetrieveTimerFromCallbackHandleTable(Handle handle) const {
-    return impl->timer_callback_handle_table.Get<Timer>(handle);
-}
-
 void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
     impl->process_list.push_back(std::move(process));
 }
 
 void KernelCore::MakeCurrentProcess(Process* process) {
     impl->current_process = process;
+    Memory::SetCurrentPageTable(&process->VMManager().page_table);
 }
 
 Process* KernelCore::CurrentProcess() {
@@ -218,6 +193,10 @@ const Process* KernelCore::CurrentProcess() const {
     return impl->current_process;
 }
 
+const std::vector<SharedPtr<Process>>& KernelCore::GetProcessList() const {
+    return impl->process_list;
+}
+
 void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
     impl->named_ports.emplace(std::move(name), std::move(port));
 }
@@ -247,18 +226,10 @@ u64 KernelCore::CreateNewProcessID() {
     return impl->next_process_id++;
 }
 
-ResultVal<Handle> KernelCore::CreateTimerCallbackHandle(const SharedPtr<Timer>& timer) {
-    return impl->timer_callback_handle_table.Create(timer);
-}
-
-CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
+Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
     return impl->thread_wakeup_event_type;
 }
 
-CoreTiming::EventType* KernelCore::TimerCallbackEventType() const {
-    return impl->timer_callback_event_type;
-}
-
 Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() {
     return impl->thread_wakeup_callback_handle_table;
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 58c9d108b..6b8738599 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -8,21 +8,23 @@
 #include <unordered_map>
 #include "core/hle/kernel/object.h"
 
-template <typename T>
-class ResultVal;
+namespace Core {
+class System;
+}
 
-namespace CoreTiming {
+namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing
 
 namespace Kernel {
 
+class AddressArbiter;
 class ClientPort;
 class HandleTable;
 class Process;
 class ResourceLimit;
 class Thread;
-class Timer;
 
 /// Represents a single instance of the kernel.
 class KernelCore {
@@ -30,7 +32,14 @@ private:
     using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
 
 public:
-    KernelCore();
+    /// Constructs an instance of the kernel using the given System
+    /// instance as a context for any necessary system-related state,
+    /// such as threads, CPU core state, etc.
+    ///
+    /// @post After execution of the constructor, the provided System
+    ///       object *must* outlive the kernel instance itself.
+    ///
+    explicit KernelCore(Core::System& system);
     ~KernelCore();
 
     KernelCore(const KernelCore&) = delete;
@@ -51,9 +60,6 @@ public:
     /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
     SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const;
 
-    /// Retrieves a shared pointer to a Timer instance within the timer callback handle table.
-    SharedPtr<Timer> RetrieveTimerFromCallbackHandleTable(Handle handle) const;
-
     /// Adds the given shared pointer to an internal list of active processes.
     void AppendNewProcess(SharedPtr<Process> process);
 
@@ -66,6 +72,9 @@ public:
     /// Retrieves a const pointer to the current process.
     const Process* CurrentProcess() const;
 
+    /// Retrieves the list of processes.
+    const std::vector<SharedPtr<Process>>& GetProcessList() const;
+
     /// Adds a port to the named port table
     void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
 
@@ -82,7 +91,6 @@ private:
     friend class Object;
     friend class Process;
     friend class Thread;
-    friend class Timer;
 
     /// Creates a new object ID, incrementing the internal object ID counter.
     u32 CreateNewObjectID();
@@ -93,14 +101,8 @@ private:
     /// Creates a new thread ID, incrementing the internal thread ID counter.
     u64 CreateNewThreadID();
 
-    /// Creates a timer callback handle for the given timer.
-    ResultVal<Handle> CreateTimerCallbackHandle(const SharedPtr<Timer>& timer);
-
     /// Retrieves the event type used for thread wakeup callbacks.
-    CoreTiming::EventType* ThreadWakeupCallbackEventType() const;
-
-    /// Retrieves the event type used for timer callbacks.
-    CoreTiming::EventType* TimerCallbackEventType() const;
+    Core::Timing::EventType* ThreadWakeupCallbackEventType() const;
 
     /// Provides a reference to the thread wakeup callback handle table.
     Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 0743670ad..98e87313b 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -2,7 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <map>
 #include <utility>
 #include <vector>
 
@@ -10,8 +9,11 @@
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/object.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
@@ -57,41 +59,47 @@ static void TransferMutexOwnership(VAddr mutex_addr, SharedPtr<Thread> current_t
     }
 }
 
-ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle holding_thread_handle,
+Mutex::Mutex(Core::System& system) : system{system} {}
+Mutex::~Mutex() = default;
+
+ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
                              Handle requesting_thread_handle) {
     // The mutex address must be 4-byte aligned
     if ((address % sizeof(u32)) != 0) {
         return ERR_INVALID_ADDRESS;
     }
 
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    Thread* const current_thread = system.CurrentScheduler().GetCurrentThread();
     SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
     SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
 
     // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
     // thread.
-    ASSERT(requesting_thread == GetCurrentThread());
+    ASSERT(requesting_thread == current_thread);
 
-    u32 addr_value = Memory::Read32(address);
+    const u32 addr_value = Memory::Read32(address);
 
     // If the mutex isn't being held, just return success.
     if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
         return RESULT_SUCCESS;
     }
 
-    if (holding_thread == nullptr)
+    if (holding_thread == nullptr) {
         return ERR_INVALID_HANDLE;
+    }
 
     // Wait until the mutex is released
-    GetCurrentThread()->SetMutexWaitAddress(address);
-    GetCurrentThread()->SetWaitHandle(requesting_thread_handle);
+    current_thread->SetMutexWaitAddress(address);
+    current_thread->SetWaitHandle(requesting_thread_handle);
 
-    GetCurrentThread()->SetStatus(ThreadStatus::WaitMutex);
-    GetCurrentThread()->InvalidateWakeupCallback();
+    current_thread->SetStatus(ThreadStatus::WaitMutex);
+    current_thread->InvalidateWakeupCallback();
 
     // Update the lock holder thread's priority to prevent priority inversion.
-    holding_thread->AddMutexWaiter(GetCurrentThread());
+    holding_thread->AddMutexWaiter(current_thread);
 
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
 
     return RESULT_SUCCESS;
 }
@@ -102,7 +110,8 @@ ResultCode Mutex::Release(VAddr address) {
         return ERR_INVALID_ADDRESS;
     }
 
-    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address);
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
 
     // There are no more threads waiting for the mutex, release it completely.
     if (thread == nullptr) {
@@ -111,7 +120,7 @@ ResultCode Mutex::Release(VAddr address) {
     }
 
     // Transfer the ownership of the mutex from the previous owner to the new one.
-    TransferMutexOwnership(address, GetCurrentThread(), thread);
+    TransferMutexOwnership(address, current_thread, thread);
 
     u32 mutex_value = thread->GetWaitHandle();
 
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index 81e62d497..b904de2e8 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -5,32 +5,34 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "core/hle/kernel/object.h"
 
 union ResultCode;
 
-namespace Kernel {
+namespace Core {
+class System;
+}
 
-class HandleTable;
-class Thread;
+namespace Kernel {
 
 class Mutex final {
 public:
+    explicit Mutex(Core::System& system);
+    ~Mutex();
+
     /// Flag that indicates that a mutex still has threads waiting for it.
     static constexpr u32 MutexHasWaitersFlag = 0x40000000;
     /// Mask of the bits in a mutex address value that contain the mutex owner.
     static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;
 
     /// Attempts to acquire a mutex at the specified address.
-    static ResultCode TryAcquire(HandleTable& handle_table, VAddr address,
-                                 Handle holding_thread_handle, Handle requesting_thread_handle);
+    ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
+                          Handle requesting_thread_handle);
 
     /// Releases the mutex at the specified address.
-    static ResultCode Release(VAddr address);
+    ResultCode Release(VAddr address);
 
 private:
-    Mutex() = default;
-    ~Mutex() = default;
+    Core::System& system;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp
index 806078638..10431e94c 100644
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -16,7 +16,6 @@ bool Object::IsWaitable() const {
     case HandleType::ReadableEvent:
     case HandleType::Thread:
     case HandleType::Process:
-    case HandleType::Timer:
     case HandleType::ServerPort:
     case HandleType::ServerSession:
         return true;
@@ -24,7 +23,7 @@ bool Object::IsWaitable() const {
     case HandleType::Unknown:
     case HandleType::WritableEvent:
     case HandleType::SharedMemory:
-    case HandleType::AddressArbiter:
+    case HandleType::TransferMemory:
     case HandleType::ResourceLimit:
     case HandleType::ClientPort:
     case HandleType::ClientSession:
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index 1541b6e3c..332876c27 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -22,10 +22,9 @@ enum class HandleType : u32 {
     WritableEvent,
     ReadableEvent,
     SharedMemory,
+    TransferMemory,
     Thread,
     Process,
-    AddressArbiter,
-    Timer,
     ResourceLimit,
     ClientPort,
     ServerPort,
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index c5aa19afa..4e94048da 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -5,10 +5,12 @@
 #include <algorithm>
 #include <memory>
 #include <random>
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
@@ -30,9 +32,6 @@ namespace {
  * @param priority The priority to give the main thread
  */
 void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
-    // Setup page table so we can write to memory
-    SetCurrentPageTable(&owner_process.VMManager().page_table);
-
     // Initialize new "main" thread
     const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
     auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
@@ -50,12 +49,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
 }
 } // Anonymous namespace
 
-CodeSet::CodeSet() = default;
-CodeSet::~CodeSet() = default;
-
-SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
-    SharedPtr<Process> process(new Process(kernel));
+SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
+    auto& kernel = system.Kernel();
 
+    SharedPtr<Process> process(new Process(system));
     process->name = std::move(name);
     process->resource_limit = kernel.GetSystemResourceLimit();
     process->status = ProcessStatus::Created;
@@ -76,6 +73,18 @@ SharedPtr<ResourceLimit> Process::GetResourceLimit() const {
     return resource_limit;
 }
 
+u64 Process::GetTotalPhysicalMemoryUsed() const {
+    return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size;
+}
+
+void Process::RegisterThread(const Thread* thread) {
+    thread_list.push_back(thread);
+}
+
+void Process::UnregisterThread(const Thread* thread) {
+    thread_list.remove(thread);
+}
+
 ResultCode Process::ClearSignalState() {
     if (status == ProcessStatus::Exited) {
         LOG_ERROR(Kernel, "called on a terminated process instance.");
@@ -97,19 +106,30 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
     is_64bit_process = metadata.Is64BitProgram();
 
     vm_manager.Reset(metadata.GetAddressSpaceType());
+    // Ensure that the potentially resized page table is seen by CPU backends.
+    Memory::SetCurrentPageTable(&vm_manager.page_table);
 
     const auto& caps = metadata.GetKernelCapabilities();
-    return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    const auto capability_init_result =
+        capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    if (capability_init_result.IsError()) {
+        return capability_init_result;
+    }
+
+    return handle_table.SetSize(capabilities.GetHandleTableSize());
 }
 
-void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
+void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
+    // The kernel always ensures that the given stack size is page aligned.
+    main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
+
     // Allocate and map the main thread stack
     // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
     // of the user address space.
+    const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
     vm_manager
-        .MapMemoryBlock(vm_manager.GetTLSIORegionEndAddress() - stack_size,
-                        std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size,
-                        MemoryState::Stack)
+        .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
+                        0, main_thread_stack_size, MemoryState::Stack)
         .Unwrap();
 
     vm_manager.LogLayout();
@@ -126,7 +146,7 @@ void Process::PrepareForTermination() {
             if (thread->GetOwnerProcess() != this)
                 continue;
 
-            if (thread == GetCurrentThread())
+            if (thread == system.CurrentScheduler().GetCurrentThread())
                 continue;
 
             // TODO(Subv): When are the other running/ready threads terminated?
@@ -138,7 +158,6 @@ void Process::PrepareForTermination() {
         }
     };
 
-    const auto& system = Core::System::GetInstance();
     stop_threads(system.Scheduler(0).GetThreadList());
     stop_threads(system.Scheduler(1).GetThreadList());
     stop_threads(system.Scheduler(2).GetThreadList());
@@ -206,35 +225,38 @@ void Process::FreeTLSSlot(VAddr tls_address) {
 }
 
 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
-    const auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions,
+    const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory));
+
+    const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
                                 MemoryState memory_state) {
         const auto vma = vm_manager
-                             .MapMemoryBlock(segment.addr + base_addr, module_.memory,
-                                             segment.offset, segment.size, memory_state)
+                             .MapMemoryBlock(segment.addr + base_addr, memory, segment.offset,
+                                             segment.size, memory_state)
                              .Unwrap();
         vm_manager.Reprotect(vma, permissions);
     };
 
     // Map CodeSet segments
-    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::CodeStatic);
-    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeMutable);
-    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);
+    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
+    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
+    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
+
+    code_memory_size += module_.memory.size();
 
     // Clear instruction cache in CPU JIT
-    Core::System::GetInstance().ArmInterface(0).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
+    system.InvalidateCpuInstructionCaches();
 }
 
-Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {}
-Kernel::Process::~Process() {}
+Process::Process(Core::System& system)
+    : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
+
+Process::~Process() = default;
 
 void Process::Acquire(Thread* thread) {
     ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
 }
 
-bool Process::ShouldWait(Thread* thread) const {
+bool Process::ShouldWait(const Thread* thread) const {
     return !is_signaled;
 }
 
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index dcc57ae9f..dda52f4c0 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -7,17 +7,23 @@
 #include <array>
 #include <bitset>
 #include <cstddef>
-#include <memory>
+#include <list>
 #include <string>
 #include <vector>
 #include <boost/container/static_vector.hpp>
 #include "common/common_types.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process_capability.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/hle/kernel/wait_object.h"
 #include "core/hle/result.h"
 
+namespace Core {
+class System;
+}
+
 namespace FileSys {
 class ProgramMetadata;
 }
@@ -28,13 +34,7 @@ class KernelCore;
 class ResourceLimit;
 class Thread;
 
-struct AddressMapping {
-    // Address and size must be page-aligned
-    VAddr address;
-    u64 size;
-    bool read_only;
-    bool unk_flag;
-};
+struct CodeSet;
 
 enum class MemoryRegion : u16 {
     APPLICATION = 1,
@@ -60,46 +60,6 @@ enum class ProcessStatus {
     DebugBreak,
 };
 
-struct CodeSet final {
-    struct Segment {
-        std::size_t offset = 0;
-        VAddr addr = 0;
-        u32 size = 0;
-    };
-
-    explicit CodeSet();
-    ~CodeSet();
-
-    Segment& CodeSegment() {
-        return segments[0];
-    }
-
-    const Segment& CodeSegment() const {
-        return segments[0];
-    }
-
-    Segment& RODataSegment() {
-        return segments[1];
-    }
-
-    const Segment& RODataSegment() const {
-        return segments[1];
-    }
-
-    Segment& DataSegment() {
-        return segments[2];
-    }
-
-    const Segment& DataSegment() const {
-        return segments[2];
-    }
-
-    std::shared_ptr<std::vector<u8>> memory;
-
-    std::array<Segment, 3> segments;
-    VAddr entrypoint = 0;
-};
-
 class Process final : public WaitObject {
 public:
     enum : u64 {
@@ -116,7 +76,7 @@ public:
 
     static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;
 
-    static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name);
+    static SharedPtr<Process> Create(Core::System& system, std::string&& name);
 
     std::string GetTypeName() const override {
         return "Process";
@@ -125,7 +85,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::Process;
+    static constexpr HandleType HANDLE_TYPE = HandleType::Process;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
@@ -150,6 +110,26 @@ public:
         return handle_table;
     }
 
+    /// Gets a reference to the process' address arbiter.
+    AddressArbiter& GetAddressArbiter() {
+        return address_arbiter;
+    }
+
+    /// Gets a const reference to the process' address arbiter.
+    const AddressArbiter& GetAddressArbiter() const {
+        return address_arbiter;
+    }
+
+    /// Gets a reference to the process' mutex lock.
+    Mutex& GetMutex() {
+        return mutex;
+    }
+
+    /// Gets a const reference to the process' mutex lock
+    const Mutex& GetMutex() const {
+        return mutex;
+    }
+
     /// Gets the current status of the process
     ProcessStatus GetStatus() const {
         return status;
@@ -207,6 +187,22 @@ public:
         return random_entropy.at(index);
     }
 
+    /// Retrieves the total physical memory used by this process in bytes.
+    u64 GetTotalPhysicalMemoryUsed() const;
+
+    /// Gets the list of all threads created with this process as their owner.
+    const std::list<const Thread*>& GetThreadList() const {
+        return thread_list;
+    }
+
+    /// Registers a thread as being created under this process,
+    /// adding it to this process' thread list.
+    void RegisterThread(const Thread* thread);
+
+    /// Unregisters a thread from this process, removing it
+    /// from this process' thread list.
+    void UnregisterThread(const Thread* thread);
+
     /// Clears the signaled state of the process if and only if it's signaled.
     ///
     /// @pre The process must not be already terminated. If this is called on a
@@ -231,7 +227,7 @@ public:
     /**
      * Applies address space changes and launches the process main thread.
      */
-    void Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size);
+    void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size);
 
     /**
      * Prepares a process for termination by stopping all of its threads
@@ -251,11 +247,11 @@ public:
     void FreeTLSSlot(VAddr tls_address);
 
 private:
-    explicit Process(KernelCore& kernel);
+    explicit Process(Core::System& system);
     ~Process() override;
 
     /// Checks if the specified thread should wait until this process is available.
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
 
     /// Acquires/locks this process for the specified thread if it's available.
     void Acquire(Thread* thread) override;
@@ -268,6 +264,12 @@ private:
     /// Memory manager for this process.
     Kernel::VMManager vm_manager;
 
+    /// Size of the main thread's stack in bytes.
+    u64 main_thread_stack_size = 0;
+
+    /// Size of the loaded code memory in bytes.
+    u64 code_memory_size = 0;
+
     /// Current status of the process
     ProcessStatus status;
 
@@ -309,9 +311,24 @@ private:
     /// Per-process handle table for storing created object handles in.
     HandleTable handle_table;
 
+    /// Per-process address arbiter.
+    AddressArbiter address_arbiter;
+
+    /// The per-process mutex lock instance used for handling various
+    /// forms of services, such as lock arbitration, and condition
+    /// variable related facilities.
+    Mutex mutex;
+
     /// Random values for svcGetInfo RandomEntropy
     std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
 
+    /// List of threads that are running with this process as their owner.
+    std::list<const Thread*> thread_list;
+
+    /// System context
+    Core::System& system;
+
+    /// Name of this process
     std::string name;
 };
 
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 3a2164b25..583e35b79 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
     interrupt_capabilities.set();
 
     // Allow using the maximum possible amount of handles
-    handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
+    handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
 
     // Allow all debugging capabilities.
     is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
         return ERR_RESERVED_VALUE;
     }
 
-    handle_table_size = (flags >> 16) & 0x3FF;
+    handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
     return RESULT_SUCCESS;
 }
 
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index fbc8812a3..5cdd80747 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
     }
 
     /// Gets the number of total allowable handles for the process' handle table.
-    u32 GetHandleTableSize() const {
+    s32 GetHandleTableSize() const {
         return handle_table_size;
     }
 
@@ -252,7 +252,7 @@ private:
     u64 core_mask = 0;
     u64 priority_mask = 0;
 
-    u32 handle_table_size = 0;
+    s32 handle_table_size = 0;
     u32 kernel_version = 0;
 
     ProgramType program_type = ProgramType::SysModule;
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 6973e580c..c2b798a4e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -14,7 +14,7 @@ namespace Kernel {
 ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {}
 ReadableEvent::~ReadableEvent() = default;
 
-bool ReadableEvent::ShouldWait(Thread* thread) const {
+bool ReadableEvent::ShouldWait(const Thread* thread) const {
     return !signaled;
 }
 
@@ -44,8 +44,4 @@ ResultCode ReadableEvent::Reset() {
     return RESULT_SUCCESS;
 }
 
-void ReadableEvent::WakeupAllWaitingThreads() {
-    WaitObject::WakeupAllWaitingThreads();
-}
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 80b3b0aba..84215f572 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -31,16 +31,14 @@ public:
         return reset_type;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ReadableEvent;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ReadableEvent;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
     void Acquire(Thread* thread) override;
 
-    void WakeupAllWaitingThreads() override;
-
     /// Unconditionally clears the readable event's state.
     void Clear();
 
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 2f9695005..173f69915 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -16,11 +16,8 @@ constexpr std::size_t ResourceTypeToIndex(ResourceType type) {
 ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {}
 ResourceLimit::~ResourceLimit() = default;
 
-SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel, std::string name) {
-    SharedPtr<ResourceLimit> resource_limit(new ResourceLimit(kernel));
-
-    resource_limit->name = std::move(name);
-    return resource_limit;
+SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel) {
+    return new ResourceLimit(kernel);
 }
 
 s64 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const {
diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h
index 59dc11c22..2613a6bb5 100644
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -31,19 +31,17 @@ constexpr bool IsValidResourceType(ResourceType type) {
 
 class ResourceLimit final : public Object {
 public:
-    /**
-     * Creates a resource limit object.
-     */
-    static SharedPtr<ResourceLimit> Create(KernelCore& kernel, std::string name = "Unknown");
+    /// Creates a resource limit object.
+    static SharedPtr<ResourceLimit> Create(KernelCore& kernel);
 
     std::string GetTypeName() const override {
         return "ResourceLimit";
     }
     std::string GetName() const override {
-        return name;
+        return GetTypeName();
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ResourceLimit;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ResourceLimit;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
@@ -95,9 +93,6 @@ private:
     ResourceArray limits{};
     /// Current resource limit values.
     ResourceArray values{};
-
-    /// Name of resource limit object.
-    std::string name;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index df4d6cf0a..e8447b69a 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
 
 std::mutex Scheduler::scheduler_mutex;
 
-Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {}
+Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
+    : cpu_core{cpu_core}, system{system} {}
 
 Scheduler::~Scheduler() {
     for (auto& thread : thread_list) {
@@ -28,8 +29,8 @@ Scheduler::~Scheduler() {
 }
 
 bool Scheduler::HaveReadyThreads() const {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
-    return ready_queue.get_first() != nullptr;
+    std::lock_guard lock{scheduler_mutex};
+    return !ready_queue.empty();
 }
 
 Thread* Scheduler::GetCurrentThread() const {
@@ -45,23 +46,28 @@ Thread* Scheduler::PopNextReadyThread() {
     Thread* thread = GetCurrentThread();
 
     if (thread && thread->GetStatus() == ThreadStatus::Running) {
+        if (ready_queue.empty()) {
+            return thread;
+        }
         // We have to do better than the current thread.
         // This call returns null when that's not possible.
-        next = ready_queue.pop_first_better(thread->GetPriority());
-        if (!next) {
-            // Otherwise just keep going with the current thread
+        next = ready_queue.front();
+        if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
             next = thread;
         }
     } else {
-        next = ready_queue.pop_first();
+        if (ready_queue.empty()) {
+            return nullptr;
+        }
+        next = ready_queue.front();
     }
 
     return next;
 }
 
 void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* const previous_thread = GetCurrentThread();
-    Process* const previous_process = Core::CurrentProcess();
+    Thread* previous_thread = GetCurrentThread();
+    Process* const previous_process = system.Kernel().CurrentProcess();
 
     UpdateLastContextSwitchTime(previous_thread, previous_process);
 
@@ -74,7 +80,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
         if (previous_thread->GetStatus() == ThreadStatus::Running) {
             // This is only the case when a reschedule is triggered without the current thread
             // yielding execution (i.e. an event triggered, system core time-sliced, etc)
-            ready_queue.push_front(previous_thread->GetPriority(), previous_thread);
+            ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
             previous_thread->SetStatus(ThreadStatus::Ready);
         }
     }
@@ -89,13 +95,12 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 
         current_thread = new_thread;
 
-        ready_queue.remove(new_thread->GetPriority(), new_thread);
+        ready_queue.remove(new_thread, new_thread->GetPriority());
         new_thread->SetStatus(ThreadStatus::Running);
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
         if (previous_process != thread_owner_process) {
-            Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process);
-            SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
 
         cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +116,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 
 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = CoreTiming::GetTicks();
+    const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
     const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
 
     if (thread != nullptr) {
@@ -126,7 +131,7 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
 }
 
 void Scheduler::Reschedule() {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     Thread* cur = GetCurrentThread();
     Thread* next = PopNextReadyThread();
@@ -142,51 +147,54 @@ void Scheduler::Reschedule() {
     SwitchContext(next);
 }
 
-void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+void Scheduler::AddThread(SharedPtr<Thread> thread) {
+    std::lock_guard lock{scheduler_mutex};
 
     thread_list.push_back(std::move(thread));
-    ready_queue.prepare(priority);
 }
 
 void Scheduler::RemoveThread(Thread* thread) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                       thread_list.end());
 }
 
 void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.push_back(priority, thread);
+    ready_queue.add(thread, priority);
 }
 
 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.remove(priority, thread);
+    ready_queue.remove(thread, priority);
 }
 
 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
+    if (thread->GetPriority() == priority) {
+        return;
+    }
 
     // If thread was ready, adjust queues
     if (thread->GetStatus() == ThreadStatus::Ready)
-        ready_queue.move(thread, thread->GetPriority(), priority);
-    else
-        ready_queue.prepare(priority);
+        ready_queue.adjust(thread, thread->GetPriority(), priority);
 }
 
 Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     const u32 mask = 1U << core;
-    return ready_queue.get_first_filter([mask, maximum_priority](Thread const* thread) {
-        return (thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority;
-    });
+    for (auto* thread : ready_queue) {
+        if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
+            return thread;
+        }
+    }
+    return nullptr;
 }
 
 void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
@@ -198,8 +206,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
     ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
 
     // Yield this thread -- sleep for zero time and force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);
 }
 
 void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -214,8 +221,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
     ASSERT(priority < THREADPRIO_COUNT);
 
     // Sleep for zero time to be able to force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);
 
     Thread* suggested_thread = nullptr;
 
@@ -223,8 +229,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
     // Take the first non-nullptr one
     for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
         const auto res =
-            Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(
-                core, priority);
+            system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
 
         // If scheduler provides a suggested thread
         if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..b29bf7be8 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -7,13 +7,14 @@
 #include <mutex>
 #include <vector>
 #include "common/common_types.h"
-#include "common/thread_queue_list.h"
+#include "common/multi_level_queue.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"
 
 namespace Core {
 class ARM_Interface;
-}
+class System;
+} // namespace Core
 
 namespace Kernel {
 
@@ -21,7 +22,7 @@ class Process;
 
 class Scheduler final {
 public:
-    explicit Scheduler(Core::ARM_Interface& cpu_core);
+    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
     ~Scheduler();
 
     /// Returns whether there are any threads that are ready to run.
@@ -37,7 +38,7 @@ public:
     u64 GetLastContextSwitchTicks() const;
 
     /// Adds a new thread to the scheduler
-    void AddThread(SharedPtr<Thread> thread, u32 priority);
+    void AddThread(SharedPtr<Thread> thread);
 
     /// Removes a thread from the scheduler
     void RemoveThread(Thread* thread);
@@ -155,13 +156,14 @@ private:
     std::vector<SharedPtr<Thread>> thread_list;
 
     /// Lists only ready thread ids.
-    Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
+    Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
 
     SharedPtr<Thread> current_thread = nullptr;
 
     Core::ARM_Interface& cpu_core;
     u64 last_context_switch_time = 0;
 
+    Core::System& system;
     static std::mutex scheduler_mutex;
 };
 
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index d6ceeb2da..02e7c60e6 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -26,7 +26,11 @@ ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
     return MakeResult(std::move(session));
 }
 
-bool ServerPort::ShouldWait(Thread* thread) const {
+void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session) {
+    pending_sessions.push_back(std::move(pending_session));
+}
+
+bool ServerPort::ShouldWait(const Thread* thread) const {
     // If there are no pending sessions, we wait until a new one is added.
     return pending_sessions.empty();
 }
@@ -35,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) {
     ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }
 
-std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair(
-    KernelCore& kernel, u32 max_sessions, std::string name) {
-
+ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
+                                                std::string name) {
     SharedPtr<ServerPort> server_port(new ServerPort(kernel));
     SharedPtr<ClientPort> client_port(new ClientPort(kernel));
 
@@ -47,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP
     client_port->max_sessions = max_sessions;
     client_port->active_sessions = 0;
 
-    return std::make_tuple(std::move(server_port), std::move(client_port));
+    return std::make_pair(std::move(server_port), std::move(client_port));
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index e52f8245f..dc88a1ebd 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -6,7 +6,7 @@
 
 #include <memory>
 #include <string>
-#include <tuple>
+#include <utility>
 #include <vector>
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
@@ -22,6 +22,9 @@ class SessionRequestHandler;
 
 class ServerPort final : public WaitObject {
 public:
+    using HLEHandler = std::shared_ptr<SessionRequestHandler>;
+    using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>;
+
     /**
      * Creates a pair of ServerPort and an associated ClientPort.
      *
@@ -30,8 +33,8 @@ public:
      * @param name Optional name of the ports
      * @return The created port tuple
      */
-    static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair(
-        KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort");
+    static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions,
+                                   std::string name = "UnknownPort");
 
     std::string GetTypeName() const override {
         return "ServerPort";
@@ -40,7 +43,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ServerPort;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ServerPort;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
@@ -51,29 +54,44 @@ public:
      */
     ResultVal<SharedPtr<ServerSession>> Accept();
 
+    /// Whether or not this server port has an HLE handler available.
+    bool HasHLEHandler() const {
+        return hle_handler != nullptr;
+    }
+
+    /// Gets the HLE handler for this port.
+    HLEHandler GetHLEHandler() const {
+        return hle_handler;
+    }
+
     /**
      * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
      * will inherit a reference to this handler.
      */
-    void SetHleHandler(std::shared_ptr<SessionRequestHandler> hle_handler_) {
+    void SetHleHandler(HLEHandler hle_handler_) {
         hle_handler = std::move(hle_handler_);
     }
 
-    std::string name; ///< Name of port (optional)
+    /// Appends a ServerSession to the collection of ServerSessions
+    /// waiting to be accepted by this port.
+    void AppendPendingSession(SharedPtr<ServerSession> pending_session);
+
+    bool ShouldWait(const Thread* thread) const override;
+    void Acquire(Thread* thread) override;
+
+private:
+    explicit ServerPort(KernelCore& kernel);
+    ~ServerPort() override;
 
     /// ServerSessions waiting to be accepted by the port
     std::vector<SharedPtr<ServerSession>> pending_sessions;
 
     /// This session's HLE request handler template (optional)
     /// ServerSessions created from this port inherit a reference to this handler.
-    std::shared_ptr<SessionRequestHandler> hle_handler;
-
-    bool ShouldWait(Thread* thread) const override;
-    void Acquire(Thread* thread) override;
+    HLEHandler hle_handler;
 
-private:
-    explicit ServerPort(KernelCore& kernel);
-    ~ServerPort() override;
+    /// Name of the port (optional)
+    std::string name;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 027434f92..696a82cd9 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -28,11 +28,9 @@ ServerSession::~ServerSession() {
     // the emulated application.
 
     // Decrease the port's connection count.
-    if (parent->port)
+    if (parent->port) {
         parent->port->ConnectionClosed();
-
-    // TODO(Subv): Wake up all the ClientSession's waiting threads and set
-    // the SendSyncRequest result to 0xC920181A.
+    }
 
     parent->server = nullptr;
 }
@@ -46,7 +44,7 @@ ResultVal<SharedPtr<ServerSession>> ServerSession::Create(KernelCore& kernel, st
     return MakeResult(std::move(server_session));
 }
 
-bool ServerSession::ShouldWait(Thread* thread) const {
+bool ServerSession::ShouldWait(const Thread* thread) const {
     // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
     if (parent->client == nullptr)
         return false;
@@ -63,42 +61,68 @@ void ServerSession::Acquire(Thread* thread) {
     pending_requesting_threads.pop_back();
 }
 
+void ServerSession::ClientDisconnected() {
+    // We keep a shared pointer to the hle handler to keep it alive throughout
+    // the call to ClientDisconnected, as ClientDisconnected invalidates the
+    // hle_handler member itself during the course of the function executing.
+    std::shared_ptr<SessionRequestHandler> handler = hle_handler;
+    if (handler) {
+        // Note that after this returns, this server session's hle_handler is
+        // invalidated (set to null).
+        handler->ClientDisconnected(this);
+    }
+
+    // Clean up the list of client threads with pending requests, they are unneeded now that the
+    // client endpoint is closed.
+    pending_requesting_threads.clear();
+    currently_handling = nullptr;
+}
+
+void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
+    domain_request_handlers.push_back(std::move(handler));
+}
+
+std::size_t ServerSession::NumDomainRequestHandlers() const {
+    return domain_request_handlers.size();
+}
+
 ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
-    auto* const domain_message_header = context.GetDomainMessageHeader();
-    if (domain_message_header) {
-        // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
-        context.SetDomainRequestHandlers(domain_request_handlers);
-
-        // If there is a DomainMessageHeader, then this is CommandType "Request"
-        const u32 object_id{context.GetDomainMessageHeader()->object_id};
-        switch (domain_message_header->command) {
-        case IPC::DomainMessageHeader::CommandType::SendMessage:
-            if (object_id > domain_request_handlers.size()) {
-                LOG_CRITICAL(IPC,
-                             "object_id {} is too big! This probably means a recent service call "
-                             "to {} needed to return a new interface!",
-                             object_id, name);
-                UNREACHABLE();
-                return RESULT_SUCCESS; // Ignore error if asserts are off
-            }
-            return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
-
-        case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
-            LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
-
-            domain_request_handlers[object_id - 1] = nullptr;
-
-            IPC::ResponseBuilder rb{context, 2};
-            rb.Push(RESULT_SUCCESS);
-            return RESULT_SUCCESS;
-        }
+    if (!context.HasDomainMessageHeader()) {
+        return RESULT_SUCCESS;
+    }
+
+    // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
+    context.SetDomainRequestHandlers(domain_request_handlers);
+
+    // If there is a DomainMessageHeader, then this is CommandType "Request"
+    const auto& domain_message_header = context.GetDomainMessageHeader();
+    const u32 object_id{domain_message_header.object_id};
+    switch (domain_message_header.command) {
+    case IPC::DomainMessageHeader::CommandType::SendMessage:
+        if (object_id > domain_request_handlers.size()) {
+            LOG_CRITICAL(IPC,
+                         "object_id {} is too big! This probably means a recent service call "
+                         "to {} needed to return a new interface!",
+                         object_id, name);
+            UNREACHABLE();
+            return RESULT_SUCCESS; // Ignore error if asserts are off
         }
+        return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
+
+    case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
+        LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
 
-        LOG_CRITICAL(IPC, "Unknown domain command={}",
-                     static_cast<int>(domain_message_header->command.Value()));
-        ASSERT(false);
+        domain_request_handlers[object_id - 1] = nullptr;
+
+        IPC::ResponseBuilder rb{context, 2};
+        rb.Push(RESULT_SUCCESS);
+        return RESULT_SUCCESS;
+    }
     }
 
+    LOG_CRITICAL(IPC, "Unknown domain command={}",
+                 static_cast<int>(domain_message_header.command.Value()));
+    ASSERT(false);
     return RESULT_SUCCESS;
 }
 
@@ -175,6 +199,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
     client_session->parent = parent;
     server_session->parent = parent;
 
-    return std::make_tuple(std::move(server_session), std::move(client_session));
+    return std::make_pair(std::move(server_session), std::move(client_session));
 }
 } // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index e0e9d64c8..738df30f8 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "core/hle/kernel/object.h"
@@ -41,12 +42,24 @@ public:
         return "ServerSession";
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ServerSession;
+    std::string GetName() const override {
+        return name;
+    }
+
+    static constexpr HandleType HANDLE_TYPE = HandleType::ServerSession;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
 
-    using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
+    Session* GetParent() {
+        return parent.get();
+    }
+
+    const Session* GetParent() const {
+        return parent.get();
+    }
+
+    using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
 
     /**
      * Creates a pair of ServerSession and an associated ClientSession.
@@ -74,27 +87,20 @@ public:
      */
     ResultCode HandleSyncRequest(SharedPtr<Thread> thread);
 
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
 
     void Acquire(Thread* thread) override;
 
-    std::string name;                ///< The name of this session (optional)
-    std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint.
-    std::shared_ptr<SessionRequestHandler>
-        hle_handler; ///< This session's HLE request handler (applicable when not a domain)
+    /// Called when a client disconnection occurs.
+    void ClientDisconnected();
 
-    /// This is the list of domain request handlers (after conversion to a domain)
-    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+    /// Adds a new domain request handler to the collection of request handlers within
+    /// this ServerSession instance.
+    void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
 
-    /// List of threads that are pending a response after a sync request. This list is processed in
-    /// a LIFO manner, thus, the last request will be dispatched first.
-    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
-    std::vector<SharedPtr<Thread>> pending_requesting_threads;
-
-    /// Thread whose request is currently being handled. A request is considered "handled" when a
-    /// response is sent via svcReplyAndReceive.
-    /// TODO(Subv): Find a better name for this.
-    SharedPtr<Thread> currently_handling;
+    /// Retrieves the total number of domain request handlers that have been
+    /// appended to this ServerSession instance.
+    std::size_t NumDomainRequestHandlers() const;
 
     /// Returns true if the session has been converted to a domain, otherwise False
     bool IsDomain() const {
@@ -129,8 +135,30 @@ private:
     /// object handle.
     ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
 
+    /// The parent session, which links to the client endpoint.
+    std::shared_ptr<Session> parent;
+
+    /// This session's HLE request handler (applicable when not a domain)
+    std::shared_ptr<SessionRequestHandler> hle_handler;
+
+    /// This is the list of domain request handlers (after conversion to a domain)
+    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+
+    /// List of threads that are pending a response after a sync request. This list is processed in
+    /// a LIFO manner, thus, the last request will be dispatched first.
+    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
+    std::vector<SharedPtr<Thread>> pending_requesting_threads;
+
+    /// Thread whose request is currently being handled. A request is considered "handled" when a
+    /// response is sent via svcReplyAndReceive.
+    /// TODO(Subv): Find a better name for this.
+    SharedPtr<Thread> currently_handling;
+
     /// When set to True, converts the session to a domain at the end of the command
     bool convert_to_domain{};
+
+    /// The name of this session (optional)
+    std::string name;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 22d0c1dd5..f15c5ee36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,11 +6,9 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/shared_memory.h"
-#include "core/memory.h"
 
 namespace Kernel {
 
@@ -34,8 +32,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
         shared_memory->backing_block_offset = 0;
 
         // Refresh the address mappings for the current process.
-        if (Core::CurrentProcess() != nullptr) {
-            Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
+        if (kernel.CurrentProcess() != nullptr) {
+            kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
                 shared_memory->backing_block.get());
         }
     } else {
@@ -120,7 +118,15 @@ ResultCode SharedMemory::Map(Process& target_process, VAddr address, MemoryPermi
                                                      ConvertPermissions(permissions));
 }
 
-ResultCode SharedMemory::Unmap(Process& target_process, VAddr address) {
+ResultCode SharedMemory::Unmap(Process& target_process, VAddr address, u64 unmap_size) {
+    if (unmap_size != size) {
+        LOG_ERROR(Kernel,
+                  "Invalid size passed to Unmap. Size must be equal to the size of the "
+                  "memory managed. Shared memory size=0x{:016X}, Unmap size=0x{:016X}",
+                  size, unmap_size);
+        return ERR_INVALID_SIZE;
+    }
+
     // TODO(Subv): Verify what happens if the application tries to unmap an address that is not
     // mapped to a SharedMemory.
     return target_process.VMManager().UnmapRange(address, size);
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index dab2a6bea..c2b6155e1 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -76,7 +76,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::SharedMemory;
+    static constexpr HandleType HANDLE_TYPE = HandleType::SharedMemory;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
@@ -104,11 +104,17 @@ public:
 
     /**
      * Unmaps a shared memory block from the specified address in system memory
+     *
      * @param target_process Process from which to unmap the memory block.
-     * @param address Address in system memory where the shared memory block is mapped
+     * @param address        Address in system memory where the shared memory block is mapped.
+     * @param unmap_size     The amount of bytes to unmap from this shared memory instance.
+     *
      * @return Result code of the unmap operation
+     *
+     * @pre The given size to unmap must be the same size as the amount of memory managed by
+     *      the SharedMemory instance itself, otherwise ERR_INVALID_SIZE will be returned.
      */
-    ResultCode Unmap(Process& target_process, VAddr address);
+    ResultCode Unmap(Process& target_process, VAddr address, u64 unmap_size);
 
     /**
      * Gets a pointer to the shared memory block
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 6588bd3b8..e5d4d6b55 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
@@ -31,6 +32,7 @@
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_wrap.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -47,23 +49,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
     return address + size > address;
 }
 
-// Checks if a given address range lies within a larger address range.
-constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
-                                    VAddr address_range_end) {
-    const VAddr end_address = address + size - 1;
-    return address_range_begin <= address && end_address <= address_range_end - 1;
-}
-
-bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
-                                vm.GetAddressSpaceEndAddress());
-}
-
-bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
-                                vm.GetNewMapRegionEndAddress());
-}
-
 // 8 GiB
 constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
 
@@ -105,14 +90,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
                   src_addr, size);
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
+    if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
                   dst_addr, size);
@@ -146,16 +131,15 @@ enum class ResourceLimitValueType {
     LimitValue,
 };
 
-ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type,
-                                          ResourceLimitValueType value_type) {
+ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
+                                          u32 resource_type, ResourceLimitValueType value_type) {
     const auto type = static_cast<ResourceType>(resource_type);
     if (!IsValidResourceType(type)) {
         LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
         return ERR_INVALID_ENUM_VALUE;
     }
 
-    const auto& kernel = Core::System::GetInstance().Kernel();
-    const auto* const current_process = kernel.CurrentProcess();
+    const auto* const current_process = system.Kernel().CurrentProcess();
     ASSERT(current_process != nullptr);
 
     const auto resource_limit_object =
@@ -175,7 +159,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_ty
 } // Anonymous namespace
 
 /// Set the process heap to a given Size. It can both extend and shrink the heap.
-static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
+static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
     LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
 
     // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
@@ -190,11 +174,8 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
         return ERR_INVALID_SIZE;
     }
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
-    const VAddr heap_base = vm_manager.GetHeapRegionBaseAddress();
-    const auto alloc_result =
-        vm_manager.HeapAllocate(heap_base, heap_size, VMAPermission::ReadWrite);
-
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
+    const auto alloc_result = vm_manager.SetHeapSize(heap_size);
     if (alloc_result.Failed()) {
         return alloc_result.Code();
     }
@@ -203,7 +184,7 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
+static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) {
     LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot);
 
     if (!Common::Is4KBAligned(addr)) {
@@ -235,10 +216,10 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     auto& vm_manager = current_process->VMManager();
 
-    if (!IsInsideAddressSpace(vm_manager, addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                   size);
@@ -260,7 +241,8 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
     return vm_manager.ReprotectRange(addr, size, converted_permissions);
 }
 
-static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attribute) {
+static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,
+                                     u32 attribute) {
     LOG_DEBUG(Kernel_SVC,
               "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address,
               size, mask, attribute);
@@ -298,8 +280,8 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
         return ERR_INVALID_COMBINATION;
     }
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
-    if (!IsInsideAddressSpace(vm_manager, address, size)) {
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
+    if (!vm_manager.IsWithinAddressSpace(address, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Given address (0x{:016X}) is outside the bounds of the address space.", address);
         return ERR_INVALID_ADDRESS_STATE;
@@ -309,11 +291,11 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
 }
 
 /// Maps a memory range into a different range.
-static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
+static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
     const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
 
     if (result.IsError()) {
@@ -324,11 +306,11 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
 }
 
 /// Unmaps a region that was previously mapped with svcMapMemory
-static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
+static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
     const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
 
     if (result.IsError()) {
@@ -339,7 +321,8 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
 }
 
 /// Connect to an OS service given the port name, returns the handle to the port to out
-static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address) {
+static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
+                                     VAddr port_name_address) {
     if (!Memory::IsValidVirtualAddress(port_name_address)) {
         LOG_ERROR(Kernel_SVC,
                   "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
@@ -358,8 +341,8 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address
 
     LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    auto it = kernel.FindNamedPort(port_name);
+    auto& kernel = system.Kernel();
+    const auto it = kernel.FindNamedPort(port_name);
     if (!kernel.IsValidNamedPort(it)) {
         LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
         return ERR_NOT_FOUND;
@@ -371,14 +354,14 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address
     CASCADE_RESULT(client_session, client_port->Connect());
 
     // Return the client session
-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
     CASCADE_RESULT(*out_handle, handle_table.Create(client_session));
     return RESULT_SUCCESS;
 }
 
 /// Makes a blocking IPC call to an OS service.
-static ResultCode SendSyncRequest(Handle handle) {
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     SharedPtr<ClientSession> session = handle_table.Get<ClientSession>(handle);
     if (!session) {
         LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
@@ -387,18 +370,18 @@ static ResultCode SendSyncRequest(Handle handle) {
 
     LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
 
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
 
     // TODO(Subv): svcSendSyncRequest should put the caller thread to sleep while the server
     // responds and cause a reschedule.
-    return session->SendSyncRequest(GetCurrentThread());
+    return session->SendSyncRequest(system.CurrentScheduler().GetCurrentThread());
 }
 
 /// Get the ID for the specified thread.
-static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) {
+static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) {
     LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", thread_handle);
@@ -410,10 +393,10 @@ static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) {
 }
 
 /// Gets the ID of the specified process or a specified thread's owning process.
-static ResultCode GetProcessId(u64* process_id, Handle handle) {
+static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Process> process = handle_table.Get<Process>(handle);
     if (process) {
         *process_id = process->GetProcessID();
@@ -455,8 +438,8 @@ static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thr
 };
 
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
-static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64 handle_count,
-                                      s64 nano_seconds) {
+static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
+                                      u64 handle_count, s64 nano_seconds) {
     LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}",
               handles_address, handle_count, nano_seconds);
 
@@ -475,11 +458,11 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
         return ERR_OUT_OF_RANGE;
     }
 
-    auto* const thread = GetCurrentThread();
+    auto* const thread = system.CurrentScheduler().GetCurrentThread();
 
     using ObjectPtr = Thread::ThreadWaitObjects::value_type;
     Thread::ThreadWaitObjects objects(handle_count);
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
 
     for (u64 i = 0; i < handle_count; ++i) {
         const Handle handle = Memory::Read32(handles_address + i * sizeof(Handle));
@@ -525,16 +508,16 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
     thread->WakeAfterDelay(nano_seconds);
     thread->SetWakeupCallback(DefaultThreadWakeupCallback);
 
-    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
 
     return RESULT_TIMEOUT;
 }
 
 /// Resumes a thread waiting on WaitSynchronization
-static ResultCode CancelSynchronization(Handle thread_handle) {
+static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) {
     LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -549,8 +532,8 @@ static ResultCode CancelSynchronization(Handle thread_handle) {
 }
 
 /// Attempts to locks a mutex, creating it if it does not already exist
-static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
-                                Handle requesting_thread_handle) {
+static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,
+                                VAddr mutex_addr, Handle requesting_thread_handle) {
     LOG_TRACE(Kernel_SVC,
               "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, "
               "requesting_current_thread_handle=0x{:08X}",
@@ -567,13 +550,13 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
         return ERR_INVALID_ADDRESS;
     }
 
-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
-    return Mutex::TryAcquire(handle_table, mutex_addr, holding_thread_handle,
-                             requesting_thread_handle);
+    auto* const current_process = system.Kernel().CurrentProcess();
+    return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
+                                                  requesting_thread_handle);
 }
 
 /// Unlock a mutex
-static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
+static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
     LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
 
     if (Memory::IsKernelVirtualAddress(mutex_addr)) {
@@ -587,7 +570,8 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
         return ERR_INVALID_ADDRESS;
     }
 
-    return Mutex::Release(mutex_addr);
+    auto* const current_process = system.Kernel().CurrentProcess();
+    return current_process->GetMutex().Release(mutex_addr);
 }
 
 enum class BreakType : u32 {
@@ -597,6 +581,7 @@ enum class BreakType : u32 {
     PostNROLoad = 4,
     PreNROUnload = 5,
     PostNROUnload = 6,
+    CppException = 7,
 };
 
 struct BreakReason {
@@ -608,7 +593,7 @@ struct BreakReason {
 };
 
 /// Break program execution
-static void Break(u32 reason, u64 info1, u64 info2) {
+static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
     BreakReason break_reason{reason};
     bool has_dumped_buffer{};
 
@@ -669,6 +654,9 @@ static void Break(u32 reason, u64 info1, u64 info2) {
                     "Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1,
                     info2);
         break;
+    case BreakType::CppException:
+        LOG_CRITICAL(Debug_Emulated, "Signalling debugger. Uncaught C++ exception encountered.");
+        break;
     default:
         LOG_WARNING(
             Debug_Emulated,
@@ -683,22 +671,24 @@ static void Break(u32 reason, u64 info1, u64 info2) {
             Debug_Emulated,
             "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
             reason, info1, info2);
+
         handle_debug_buffer(info1, info2);
-        Core::System::GetInstance()
-            .ArmInterface(static_cast<std::size_t>(GetCurrentThread()->GetProcessorID()))
-            .LogBacktrace();
+
+        auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+        const auto thread_processor_id = current_thread->GetProcessorID();
+        system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
         ASSERT(false);
 
-        Core::CurrentProcess()->PrepareForTermination();
+        system.Kernel().CurrentProcess()->PrepareForTermination();
 
         // Kill the current thread
-        GetCurrentThread()->Stop();
-        Core::System::GetInstance().PrepareReschedule();
+        current_thread->Stop();
+        system.PrepareReschedule();
     }
 }
 
 /// Used to output a message on a debug hardware unit - does nothing on a retail unit
-static void OutputDebugString(VAddr address, u64 len) {
+static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) {
     if (len == 0) {
         return;
     }
@@ -709,7 +699,8 @@ static void OutputDebugString(VAddr address, u64 len) {
 }
 
 /// Gets system/memory information for the current process
-static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) {
+static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle,
+                          u64 info_sub_id) {
     LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
               info_sub_id, handle);
 
@@ -722,7 +713,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
         HeapRegionBaseAddr = 4,
         HeapRegionSize = 5,
         TotalMemoryUsage = 6,
-        TotalHeapUsage = 7,
+        TotalPhysicalMemoryUsed = 7,
         IsCurrentProcessBeingDebugged = 8,
         RegisterResourceLimit = 9,
         IdleTickCount = 10,
@@ -758,7 +749,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
     case GetInfoType::NewMapRegionBaseAddr:
     case GetInfoType::NewMapRegionSize:
     case GetInfoType::TotalMemoryUsage:
-    case GetInfoType::TotalHeapUsage:
+    case GetInfoType::TotalPhysicalMemoryUsed:
     case GetInfoType::IsVirtualAddressMemoryEnabled:
     case GetInfoType::PersonalMmHeapUsage:
     case GetInfoType::TitleId:
@@ -767,7 +758,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_ENUM_VALUE;
         }
 
-        const auto& current_process_handle_table = Core::CurrentProcess()->GetHandleTable();
+        const auto& current_process_handle_table =
+            system.Kernel().CurrentProcess()->GetHandleTable();
         const auto process = current_process_handle_table.Get<Process>(static_cast<Handle>(handle));
         if (!process) {
             return ERR_INVALID_HANDLE;
@@ -818,8 +810,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             *result = process->VMManager().GetTotalMemoryUsage();
             return RESULT_SUCCESS;
 
-        case GetInfoType::TotalHeapUsage:
-            *result = process->VMManager().GetTotalHeapUsage();
+        case GetInfoType::TotalPhysicalMemoryUsed:
+            *result = process->GetTotalPhysicalMemoryUsed();
             return RESULT_SUCCESS;
 
         case GetInfoType::IsVirtualAddressMemoryEnabled:
@@ -857,7 +849,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_COMBINATION;
         }
 
-        Process* const current_process = Core::CurrentProcess();
+        Process* const current_process = system.Kernel().CurrentProcess();
         HandleTable& handle_table = current_process->GetHandleTable();
         const auto resource_limit = current_process->GetResourceLimit();
         if (!resource_limit) {
@@ -888,7 +880,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_COMBINATION;
         }
 
-        *result = Core::CurrentProcess()->GetRandomEntropy(info_sub_id);
+        *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
         return RESULT_SUCCESS;
 
     case GetInfoType::PrivilegedProcessId:
@@ -905,15 +897,15 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_COMBINATION;
         }
 
-        const auto thread =
-            Core::CurrentProcess()->GetHandleTable().Get<Thread>(static_cast<Handle>(handle));
+        const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<Thread>(
+            static_cast<Handle>(handle));
         if (!thread) {
             LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
                       static_cast<Handle>(handle));
             return ERR_INVALID_HANDLE;
         }
 
-        const auto& system = Core::System::GetInstance();
+        const auto& core_timing = system.CoreTiming();
         const auto& scheduler = system.CurrentScheduler();
         const auto* const current_thread = scheduler.GetCurrentThread();
         const bool same_thread = current_thread == thread;
@@ -923,9 +915,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
         if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
             const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
 
-            out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks);
+            out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
         } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
-            out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks;
+            out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
         }
 
         *result = out_ticks;
@@ -939,13 +931,13 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
 }
 
 /// Sets the thread activity
-static ResultCode SetThreadActivity(Handle handle, u32 activity) {
+static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
     if (activity > static_cast<u32>(ThreadActivity::Paused)) {
         return ERR_INVALID_ENUM_VALUE;
     }
 
-    const auto* current_process = Core::CurrentProcess();
+    const auto* current_process = system.Kernel().CurrentProcess();
     const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -962,7 +954,7 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) {
         return ERR_INVALID_HANDLE;
     }
 
-    if (thread == GetCurrentThread()) {
+    if (thread == system.CurrentScheduler().GetCurrentThread()) {
         LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
         return ERR_BUSY;
     }
@@ -972,10 +964,10 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) {
 }
 
 /// Gets the thread context
-static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
+static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called, context=0x{:08X}, thread=0x{:X}", thread_context, handle);
 
-    const auto* current_process = Core::CurrentProcess();
+    const auto* current_process = system.Kernel().CurrentProcess();
     const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -992,7 +984,7 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
         return ERR_INVALID_HANDLE;
     }
 
-    if (thread == GetCurrentThread()) {
+    if (thread == system.CurrentScheduler().GetCurrentThread()) {
         LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
         return ERR_BUSY;
     }
@@ -1013,10 +1005,10 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
 }
 
 /// Gets the priority for the specified thread
-static ResultCode GetThreadPriority(u32* priority, Handle handle) {
+static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) {
     LOG_TRACE(Kernel_SVC, "called");
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -1028,7 +1020,7 @@ static ResultCode GetThreadPriority(u32* priority, Handle handle) {
 }
 
 /// Sets the priority for the specified thread
-static ResultCode SetThreadPriority(Handle handle, u32 priority) {
+static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) {
     LOG_TRACE(Kernel_SVC, "called");
 
     if (priority > THREADPRIO_LOWEST) {
@@ -1039,7 +1031,7 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
         return ERR_INVALID_THREAD_PRIORITY;
     }
 
-    const auto* const current_process = Core::CurrentProcess();
+    const auto* const current_process = system.Kernel().CurrentProcess();
 
     SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
     if (!thread) {
@@ -1049,18 +1041,18 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
 
     thread->SetPriority(priority);
 
-    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
 /// Get which CPU core is executing the current thread
-static u32 GetCurrentProcessorNumber() {
+static u32 GetCurrentProcessorNumber(Core::System& system) {
     LOG_TRACE(Kernel_SVC, "called");
-    return GetCurrentThread()->GetProcessorID();
+    return system.CurrentScheduler().GetCurrentThread()->GetProcessorID();
 }
 
-static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size,
-                                  u32 permissions) {
+static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
+                                  u64 size, u32 permissions) {
     LOG_TRACE(Kernel_SVC,
               "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
               shared_memory_handle, addr, size, permissions);
@@ -1094,7 +1086,7 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle);
     if (!shared_memory) {
         LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
@@ -1112,7 +1104,8 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
     return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare);
 }
 
-static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) {
+static ResultCode UnmapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
+                                    u64 size) {
     LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}",
                 shared_memory_handle, addr, size);
 
@@ -1137,7 +1130,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle);
     if (!shared_memory) {
         LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
@@ -1152,13 +1145,14 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
         return ERR_INVALID_MEMORY_RANGE;
     }
 
-    return shared_memory->Unmap(*current_process, addr);
+    return shared_memory->Unmap(*current_process, addr, size);
 }
 
-static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_address,
-                                     Handle process_handle, VAddr address) {
+static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,
+                                     VAddr page_info_address, Handle process_handle,
+                                     VAddr address) {
     LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address);
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     SharedPtr<Process> process = handle_table.Get<Process>(process_handle);
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
@@ -1184,20 +1178,20 @@ static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_
     return RESULT_SUCCESS;
 }
 
-static ResultCode QueryMemory(VAddr memory_info_address, VAddr page_info_address,
-                              VAddr query_address) {
+static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address,
+                              VAddr page_info_address, VAddr query_address) {
     LOG_TRACE(Kernel_SVC,
               "called, memory_info_address=0x{:016X}, page_info_address=0x{:016X}, "
               "query_address=0x{:016X}",
               memory_info_address, page_info_address, query_address);
 
-    return QueryProcessMemory(memory_info_address, page_info_address, CurrentProcess,
+    return QueryProcessMemory(system, memory_info_address, page_info_address, CurrentProcess,
                               query_address);
 }
 
 /// Exits the current process
-static void ExitProcess() {
-    auto* current_process = Core::CurrentProcess();
+static void ExitProcess(Core::System& system) {
+    auto* current_process = system.Kernel().CurrentProcess();
 
     LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
     ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
@@ -1206,20 +1200,20 @@ static void ExitProcess() {
     current_process->PrepareForTermination();
 
     // Kill the current thread
-    GetCurrentThread()->Stop();
+    system.CurrentScheduler().GetCurrentThread()->Stop();
 
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
 }
 
 /// Creates a new thread
-static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, VAddr stack_top,
-                               u32 priority, s32 processor_id) {
+static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
+                               VAddr stack_top, u32 priority, s32 processor_id) {
     LOG_TRACE(Kernel_SVC,
               "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
               "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
               entry_point, arg, stack_top, priority, processor_id, *out_handle);
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
 
     if (processor_id == THREADPROCESSORID_IDEAL) {
         // Set the target CPU to the one specified by the process.
@@ -1251,7 +1245,7 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
     }
 
     const std::string name = fmt::format("thread-{:X}", entry_point);
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     CASCADE_RESULT(SharedPtr<Thread> thread,
                    Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top,
                                   *current_process));
@@ -1265,16 +1259,16 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
     thread->SetGuestHandle(*new_guest_handle);
     *out_handle = *new_guest_handle;
 
-    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
 
     return RESULT_SUCCESS;
 }
 
 /// Starts the thread for the provided handle
-static ResultCode StartThread(Handle thread_handle) {
+static ResultCode StartThread(Core::System& system, Handle thread_handle) {
     LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1287,22 +1281,24 @@ static ResultCode StartThread(Handle thread_handle) {
     thread->ResumeFromWait();
 
     if (thread->GetStatus() == ThreadStatus::Ready) {
-        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+        system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
     }
 
     return RESULT_SUCCESS;
 }
 
 /// Called when a thread exits
-static void ExitThread() {
-    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
+static void ExitThread(Core::System& system) {
+    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
 
-    ExitCurrentThread();
-    Core::System::GetInstance().PrepareReschedule();
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    current_thread->Stop();
+    system.CurrentScheduler().RemoveThread(current_thread);
+    system.PrepareReschedule();
 }
 
 /// Sleep the current thread
-static void SleepThread(s64 nanoseconds) {
+static void SleepThread(Core::System& system, s64 nanoseconds) {
     LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
 
     enum class SleepType : s64 {
@@ -1311,72 +1307,91 @@ static void SleepThread(s64 nanoseconds) {
         YieldAndWaitForLoadBalancing = -2,
     };
 
+    auto& scheduler = system.CurrentScheduler();
+    auto* const current_thread = scheduler.GetCurrentThread();
+
     if (nanoseconds <= 0) {
-        auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
         switch (static_cast<SleepType>(nanoseconds)) {
         case SleepType::YieldWithoutLoadBalancing:
-            scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithoutLoadBalancing(current_thread);
             break;
         case SleepType::YieldWithLoadBalancing:
-            scheduler.YieldWithLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithLoadBalancing(current_thread);
             break;
         case SleepType::YieldAndWaitForLoadBalancing:
-            scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
+            scheduler.YieldAndWaitForLoadBalancing(current_thread);
             break;
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
         }
     } else {
-        // Sleep current thread and check for next thread to schedule
-        WaitCurrentThread_Sleep();
-
-        // Create an event to wake the thread up after the specified nanosecond delay has passed
-        GetCurrentThread()->WakeAfterDelay(nanoseconds);
+        current_thread->Sleep(nanoseconds);
     }
 
     // Reschedule all CPU cores
-    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i)
-        Core::System::GetInstance().CpuCore(i).PrepareReschedule();
+    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
+        system.CpuCore(i).PrepareReschedule();
+    }
 }
 
 /// Wait process wide key atomic
-static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr,
-                                           Handle thread_handle, s64 nano_seconds) {
+static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_addr,
+                                           VAddr condition_variable_addr, Handle thread_handle,
+                                           s64 nano_seconds) {
     LOG_TRACE(
         Kernel_SVC,
         "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
         mutex_addr, condition_variable_addr, thread_handle, nano_seconds);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    if (Memory::IsKernelVirtualAddress(mutex_addr)) {
+        LOG_ERROR(
+            Kernel_SVC,
+            "Given mutex address must not be within the kernel address space. address=0x{:016X}",
+            mutex_addr);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!Common::IsWordAligned(mutex_addr)) {
+        LOG_ERROR(Kernel_SVC, "Given mutex address must be word-aligned. address=0x{:016X}",
+                  mutex_addr);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    auto* const current_process = system.Kernel().CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
     SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     ASSERT(thread);
 
-    CASCADE_CODE(Mutex::Release(mutex_addr));
+    const auto release_result = current_process->GetMutex().Release(mutex_addr);
+    if (release_result.IsError()) {
+        return release_result;
+    }
 
-    SharedPtr<Thread> current_thread = GetCurrentThread();
+    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
     current_thread->SetCondVarWaitAddress(condition_variable_addr);
     current_thread->SetMutexWaitAddress(mutex_addr);
     current_thread->SetWaitHandle(thread_handle);
-    current_thread->SetStatus(ThreadStatus::WaitMutex);
+    current_thread->SetStatus(ThreadStatus::WaitCondVar);
     current_thread->InvalidateWakeupCallback();
 
     current_thread->WakeAfterDelay(nano_seconds);
 
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
 /// Signal process wide key
-static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target) {
+static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
+                                       s32 target) {
     LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
               condition_variable_addr, target);
 
-    const auto RetrieveWaitingThreads = [](std::size_t core_index,
-                                           std::vector<SharedPtr<Thread>>& waiting_threads,
-                                           VAddr condvar_addr) {
-        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+    const auto RetrieveWaitingThreads = [&system](std::size_t core_index,
+                                                  std::vector<SharedPtr<Thread>>& waiting_threads,
+                                                  VAddr condvar_addr) {
+        const auto& scheduler = system.Scheduler(core_index);
         const auto& thread_list = scheduler.GetThreadList();
 
         for (const auto& thread : thread_list) {
@@ -1401,10 +1416,10 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
     // them all.
     std::size_t last = waiting_threads.size();
     if (target != -1)
-        last = target;
+        last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
 
     // If there are no threads waiting on this condition variable, just exit
-    if (last > waiting_threads.size())
+    if (last == 0)
         return RESULT_SUCCESS;
 
     for (std::size_t index = 0; index < last; ++index) {
@@ -1412,9 +1427,11 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
         ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
 
-        std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
+        // liberate Cond Var Thread.
+        thread->SetCondVarWaitAddress(0);
 
-        auto& monitor = Core::System::GetInstance().Monitor();
+        const std::size_t current_core = system.CurrentCoreIndex();
+        auto& monitor = system.Monitor();
 
         // Atomically read the value of the mutex.
         u32 mutex_val = 0;
@@ -1430,10 +1447,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
             }
         } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
                                            thread->GetWaitHandle()));
-
         if (mutex_val == 0) {
             // We were able to acquire the mutex, resume this thread.
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
             thread->ResumeFromWait();
 
             auto* const lock_owner = thread->GetLockOwner();
@@ -1443,8 +1459,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
             thread->SetLockOwner(nullptr);
             thread->SetMutexWaitAddress(0);
-            thread->SetCondVarWaitAddress(0);
             thread->SetWaitHandle(0);
+            system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
         } else {
             // Atomically signal that the mutex now has a waiting thread.
             do {
@@ -1460,15 +1476,14 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
             // The mutex is already owned by some other thread, make this thread wait on it.
             const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
-            const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+            const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
             auto owner = handle_table.Get<Thread>(owner_handle);
             ASSERT(owner);
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
             thread->InvalidateWakeupCallback();
+            thread->SetStatus(ThreadStatus::WaitMutex);
 
             owner->AddMutexWaiter(thread);
-
-            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
         }
     }
 
@@ -1476,93 +1491,77 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 }
 
 // Wait for an address (via Address Arbiter)
-static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout) {
+static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
+                                 s64 timeout) {
     LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
                 address, type, value, timeout);
+
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
         return ERR_INVALID_ADDRESS_STATE;
     }
+
     // If the address is not properly aligned to 4 bytes, return invalid address.
     if (!Common::IsWordAligned(address)) {
         LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
         return ERR_INVALID_ADDRESS;
     }
 
-    switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
-    case AddressArbiter::ArbitrationType::WaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
-    case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
-    case AddressArbiter::ArbitrationType::WaitIfEqual:
-        return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
-    default:
-        LOG_ERROR(Kernel_SVC,
-                  "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
-                  "or WaitIfEqual but got {}",
-                  type);
-        return ERR_INVALID_ENUM_VALUE;
-    }
+    const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
+    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
+    return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
 }
 
 // Signals to an address (via Address Arbiter)
-static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to_wake) {
+static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
+                                  s32 num_to_wake) {
     LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
                 address, type, value, num_to_wake);
+
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
         return ERR_INVALID_ADDRESS_STATE;
     }
+
     // If the address is not properly aligned to 4 bytes, return invalid address.
     if (!Common::IsWordAligned(address)) {
         LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
         return ERR_INVALID_ADDRESS;
     }
 
-    switch (static_cast<AddressArbiter::SignalType>(type)) {
-    case AddressArbiter::SignalType::Signal:
-        return AddressArbiter::SignalToAddress(address, num_to_wake);
-    case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
-        return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
-    case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
-        return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
-                                                                             num_to_wake);
-    default:
-        LOG_ERROR(Kernel_SVC,
-                  "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
-                  "or ModifyByWaitingCountAndSignalIfEqual but got {}",
-                  type);
-        return ERR_INVALID_ENUM_VALUE;
-    }
+    const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
+    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
+    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }
 
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
-static u64 GetSystemTick() {
+static u64 GetSystemTick(Core::System& system) {
     LOG_TRACE(Kernel_SVC, "called");
 
-    const u64 result{CoreTiming::GetTicks()};
+    auto& core_timing = system.CoreTiming();
+    const u64 result{core_timing.GetTicks()};
 
     // Advance time to defeat dumb games that busy-wait for the frame to end.
-    CoreTiming::AddTicks(400);
+    core_timing.AddTicks(400);
 
     return result;
 }
 
 /// Close a handle
-static ResultCode CloseHandle(Handle handle) {
+static ResultCode CloseHandle(Core::System& system, Handle handle) {
     LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle);
 
-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     return handle_table.Close(handle);
 }
 
 /// Clears the signaled state of an event or process.
-static ResultCode ResetSignal(Handle handle) {
+static ResultCode ResetSignal(Core::System& system, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
 
     auto event = handle_table.Get<ReadableEvent>(handle);
     if (event) {
@@ -1579,7 +1578,8 @@ static ResultCode ResetSignal(Handle handle) {
 }
 
 /// Creates a TransferMemory object
-static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 permissions) {
+static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size,
+                                       u32 permissions) {
     LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
               permissions);
 
@@ -1607,19 +1607,129 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    auto process = kernel.CurrentProcess();
-    auto& handle_table = process->GetHandleTable();
-    const auto shared_mem_handle = SharedMemory::Create(kernel, process, size, perms, perms, addr);
+    auto& kernel = system.Kernel();
+    auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
 
-    CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
+    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
+    const auto result = handle_table.Create(std::move(transfer_mem_handle));
+    if (result.Failed()) {
+        return result.Code();
+    }
+
+    *handle = *result;
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
+static ResultCode MapTransferMemory(Core::System& system, Handle handle, VAddr address, u64 size,
+                                    u32 permission_raw) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}",
+              handle, address, size, permission_raw);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto permissions = static_cast<MemoryPermission>(permission_raw);
+    if (permissions != MemoryPermission::None && permissions != MemoryPermission::Read &&
+        permissions != MemoryPermission::ReadWrite) {
+        LOG_ERROR(Kernel_SVC, "Invalid transfer memory permissions given (permissions=0x{:08X}).",
+                  permission_raw);
+        return ERR_INVALID_STATE;
+    }
+
+    const auto& kernel = system.Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->MapMemory(address, size, permissions);
+}
+
+static ResultCode UnmapTransferMemory(Core::System& system, Handle handle, VAddr address,
+                                      u64 size) {
+    LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle,
+              address, size);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& kernel = system.Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->UnmapMemory(address, size);
+}
+
+static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core,
+                                    u64* mask) {
     LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1633,11 +1743,12 @@ static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask)
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
+static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
+                                    u64 mask) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
               mask, core);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1682,8 +1793,8 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permissions,
-                                     u32 remote_permissions) {
+static ResultCode CreateSharedMemory(Core::System& system, Handle* handle, u64 size,
+                                     u32 local_permissions, u32 remote_permissions) {
     LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size,
               local_permissions, remote_permissions);
     if (size == 0) {
@@ -1719,7 +1830,7 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     auto process = kernel.CurrentProcess();
     auto& handle_table = process->GetHandleTable();
     auto shared_mem_handle = SharedMemory::Create(kernel, process, size, local_perms, remote_perms);
@@ -1728,10 +1839,10 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
     return RESULT_SUCCESS;
 }
 
-static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) {
+static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) {
     LOG_DEBUG(Kernel_SVC, "called");
 
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     const auto [readable_event, writable_event] =
         WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");
 
@@ -1756,10 +1867,10 @@ static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode ClearEvent(Handle handle) {
+static ResultCode ClearEvent(Core::System& system, Handle handle) {
     LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
 
     auto writable_event = handle_table.Get<WritableEvent>(handle);
     if (writable_event) {
@@ -1777,10 +1888,10 @@ static ResultCode ClearEvent(Handle handle) {
     return ERR_INVALID_HANDLE;
 }
 
-static ResultCode SignalEvent(Handle handle) {
+static ResultCode SignalEvent(Core::System& system, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle);
 
-    HandleTable& handle_table = Core::CurrentProcess()->GetHandleTable();
+    HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     auto writable_event = handle_table.Get<WritableEvent>(handle);
 
     if (!writable_event) {
@@ -1792,7 +1903,7 @@ static ResultCode SignalEvent(Handle handle) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
+static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type);
 
     // This function currently only allows retrieving a process' status.
@@ -1800,7 +1911,7 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
         Status,
     };
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const auto process = handle_table.Get<Process>(process_handle);
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
@@ -1818,10 +1929,10 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode CreateResourceLimit(Handle* out_handle) {
+static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) {
     LOG_DEBUG(Kernel_SVC, "called");
 
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     auto resource_limit = ResourceLimit::Create(kernel);
 
     auto* const current_process = kernel.CurrentProcess();
@@ -1836,11 +1947,11 @@ static ResultCode CreateResourceLimit(Handle* out_handle) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit,
-                                             u32 resource_type) {
+static ResultCode GetResourceLimitLimitValue(Core::System& system, u64* out_value,
+                                             Handle resource_limit, u32 resource_type) {
     LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
 
-    const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type,
+    const auto limit_value = RetrieveResourceLimitValue(system, resource_limit, resource_type,
                                                         ResourceLimitValueType::LimitValue);
     if (limit_value.Failed()) {
         return limit_value.Code();
@@ -1850,11 +1961,11 @@ static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_lim
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit,
-                                               u32 resource_type) {
+static ResultCode GetResourceLimitCurrentValue(Core::System& system, u64* out_value,
+                                               Handle resource_limit, u32 resource_type) {
     LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
 
-    const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type,
+    const auto current_value = RetrieveResourceLimitValue(system, resource_limit, resource_type,
                                                           ResourceLimitValueType::CurrentValue);
     if (current_value.Failed()) {
         return current_value.Code();
@@ -1864,7 +1975,8 @@ static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_l
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) {
+static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resource_limit,
+                                             u32 resource_type, u64 value) {
     LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit,
               resource_type, value);
 
@@ -1874,8 +1986,7 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
         return ERR_INVALID_ENUM_VALUE;
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    auto* const current_process = kernel.CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     ASSERT(current_process != nullptr);
 
     auto resource_limit_object =
@@ -1899,9 +2010,86 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
     return RESULT_SUCCESS;
 }
 
+static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
+                                 VAddr out_process_ids, u32 out_process_ids_size) {
+    LOG_DEBUG(Kernel_SVC, "called. out_process_ids=0x{:016X}, out_process_ids_size={}",
+              out_process_ids, out_process_ids_size);
+
+    // If the supplied size is negative or greater than INT32_MAX / sizeof(u64), bail.
+    if ((out_process_ids_size & 0xF0000000) != 0) {
+        LOG_ERROR(Kernel_SVC,
+                  "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
+                  out_process_ids_size);
+        return ERR_OUT_OF_RANGE;
+    }
+
+    const auto& kernel = system.Kernel();
+    const auto& vm_manager = kernel.CurrentProcess()->VMManager();
+    const auto total_copy_size = out_process_ids_size * sizeof(u64);
+
+    if (out_process_ids_size > 0 &&
+        !vm_manager.IsWithinAddressSpace(out_process_ids, total_copy_size)) {
+        LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
+                  out_process_ids, out_process_ids + total_copy_size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& process_list = kernel.GetProcessList();
+    const auto num_processes = process_list.size();
+    const auto copy_amount = std::min(std::size_t{out_process_ids_size}, num_processes);
+
+    for (std::size_t i = 0; i < copy_amount; ++i) {
+        Memory::Write64(out_process_ids, process_list[i]->GetProcessID());
+        out_process_ids += sizeof(u64);
+    }
+
+    *out_num_processes = static_cast<u32>(num_processes);
+    return RESULT_SUCCESS;
+}
+
+ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
+                         u32 out_thread_ids_size, Handle debug_handle) {
+    // TODO: Handle this case when debug events are supported.
+    UNIMPLEMENTED_IF(debug_handle != InvalidHandle);
+
+    LOG_DEBUG(Kernel_SVC, "called. out_thread_ids=0x{:016X}, out_thread_ids_size={}",
+              out_thread_ids, out_thread_ids_size);
+
+    // If the size is negative or larger than INT32_MAX / sizeof(u64)
+    if ((out_thread_ids_size & 0xF0000000) != 0) {
+        LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
+                  out_thread_ids_size);
+        return ERR_OUT_OF_RANGE;
+    }
+
+    const auto* const current_process = system.Kernel().CurrentProcess();
+    const auto& vm_manager = current_process->VMManager();
+    const auto total_copy_size = out_thread_ids_size * sizeof(u64);
+
+    if (out_thread_ids_size > 0 &&
+        !vm_manager.IsWithinAddressSpace(out_thread_ids, total_copy_size)) {
+        LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
+                  out_thread_ids, out_thread_ids + total_copy_size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& thread_list = current_process->GetThreadList();
+    const auto num_threads = thread_list.size();
+    const auto copy_amount = std::min(std::size_t{out_thread_ids_size}, num_threads);
+
+    auto list_iter = thread_list.cbegin();
+    for (std::size_t i = 0; i < copy_amount; ++i, ++list_iter) {
+        Memory::Write64(out_thread_ids, (*list_iter)->GetThreadID());
+        out_thread_ids += sizeof(u64);
+    }
+
+    *out_num_threads = static_cast<u32>(num_threads);
+    return RESULT_SUCCESS;
+}
+
 namespace {
 struct FunctionDef {
-    using Func = void();
+    using Func = void(Core::System&);
 
     u32 id;
     Func* func;
@@ -1991,8 +2179,8 @@ static const FunctionDef SVC_Table[] = {
     {0x4E, nullptr, "ReadWriteRegister"},
     {0x4F, nullptr, "SetProcessActivity"},
     {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"},
-    {0x51, nullptr, "MapTransferMemory"},
-    {0x52, nullptr, "UnmapTransferMemory"},
+    {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"},
+    {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"},
     {0x53, nullptr, "CreateInterruptEvent"},
     {0x54, nullptr, "QueryPhysicalAddress"},
     {0x55, nullptr, "QueryIoMapping"},
@@ -2011,8 +2199,8 @@ static const FunctionDef SVC_Table[] = {
     {0x62, nullptr, "TerminateDebugProcess"},
     {0x63, nullptr, "GetDebugEvent"},
     {0x64, nullptr, "ContinueDebugEvent"},
-    {0x65, nullptr, "GetProcessList"},
-    {0x66, nullptr, "GetThreadList"},
+    {0x65, SvcWrap<GetProcessList>, "GetProcessList"},
+    {0x66, SvcWrap<GetThreadList>, "GetThreadList"},
     {0x67, nullptr, "GetDebugThreadContext"},
     {0x68, nullptr, "SetDebugThreadContext"},
     {0x69, nullptr, "QueryDebugProcessMemory"},
@@ -2050,16 +2238,16 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
 
 MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 
-void CallSVC(u32 immediate) {
+void CallSVC(Core::System& system, u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
     // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
 
     const FunctionDef* info = GetSVCInfo(immediate);
     if (info) {
         if (info->func) {
-            info->func();
+            info->func(system);
         } else {
             LOG_CRITICAL(Kernel_SVC, "Unimplemented SVC function {}(..)", info->name);
         }
diff --git a/src/core/hle/kernel/svc.h b/src/core/hle/kernel/svc.h
index c37ae0f98..c5539ac1c 100644
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -6,8 +6,12 @@
 
 #include "common/common_types.h"
 
+namespace Core {
+class System;
+}
+
 namespace Kernel {
 
-void CallSVC(u32 immediate);
+void CallSVC(Core::System& system, u32 immediate);
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 2a2c2c5ea..b3690b5f3 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -11,270 +11,312 @@
 
 namespace Kernel {
 
-static inline u64 Param(int n) {
-    return Core::CurrentArmInterface().GetReg(n);
+static inline u64 Param(const Core::System& system, int n) {
+    return system.CurrentArmInterface().GetReg(n);
 }
 
 /**
  * HLE a function return from the current ARM userland process
- * @param res Result to return
+ * @param system System context
+ * @param result Result to return
  */
-static inline void FuncReturn(u64 res) {
-    Core::CurrentArmInterface().SetReg(0, res);
+static inline void FuncReturn(Core::System& system, u64 result) {
+    system.CurrentArmInterface().SetReg(0, result);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type ResultCode
 
-template <ResultCode func(u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0)).raw);
+template <ResultCode func(Core::System&, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0)).raw);
 }
 
-template <ResultCode func(u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0))).raw);
+template <ResultCode func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
 }
 
-template <ResultCode func(u32, u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u32, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(
+        system,
+        func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw);
 }
 
-template <ResultCode func(u32*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*)>
+void SvcWrap(Core::System& system) {
     u32 param = 0;
-    const u32 retval = func(&param).raw;
-    Core::CurrentArmInterface().SetReg(1, param);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param).raw;
+    system.CurrentArmInterface().SetReg(1, param);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u32*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u32*)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
     u32 param_2 = 0;
-    const u32 retval = func(&param_1, &param_2).raw;
+    const u32 retval = func(system, &param_1, &param_2).raw;
 
-    auto& arm_interface = Core::CurrentArmInterface();
+    auto& arm_interface = system.CurrentArmInterface();
     arm_interface.SetReg(1, param_1);
     arm_interface.SetReg(2, param_2);
 
-    FuncReturn(retval);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    const u32 retval = func(&param_1, Param(1)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1)).raw;
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
+}
+
+template <ResultCode func(Core::System&, u32*, u64, u32)>
+void SvcWrap(Core::System& system) {
+    u32 param_1 = 0;
+    const u32 retval =
+        func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64*, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u32)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    const u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64, s32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<s32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
 }
 
-template <ResultCode func(u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
 }
 
-template <ResultCode func(u64*, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u64)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, Param(1)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1)).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64*, u32, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u32, u32)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, static_cast<u32>(Param(1)), static_cast<u32>(Param(2))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)),
+                            static_cast<u32>(Param(system, 2)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1)).raw);
+template <ResultCode func(Core::System&, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw);
 }
 
-template <ResultCode func(u32, u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1)), Param(2)).raw);
+template <ResultCode func(Core::System&, u32, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)),
+                            static_cast<u32>(Param(system, 1)), Param(system, 2))
+                           .raw);
 }
 
-template <ResultCode func(u32, u32*, u64*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32, u32*, u64*)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
     u64 param_2 = 0;
-    ResultCode retval = func(static_cast<u32>(Param(2)), &param_1, &param_2);
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    Core::CurrentArmInterface().SetReg(2, param_2);
-    FuncReturn(retval.raw);
-}
+    const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2);
 
-template <ResultCode func(u64, u64, u32, u32)>
-void SvcWrap() {
-    FuncReturn(
-        func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw);
+    system.CurrentArmInterface().SetReg(1, param_1);
+    system.CurrentArmInterface().SetReg(2, param_2);
+    FuncReturn(system, retval.raw);
 }
 
-template <ResultCode func(u64, u64, u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2)), Param(3)).raw);
+template <ResultCode func(Core::System&, u64, u64, u32, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u32, u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), static_cast<u32>(Param(2))).raw);
+template <ResultCode func(Core::System&, u64, u64, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), Param(system, 3))
+                           .raw);
 }
 
-template <ResultCode func(u64, u64, u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), Param(2)).raw);
+template <ResultCode func(Core::System&, u32, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)))
+                           .raw);
 }
 
-template <ResultCode func(u64, u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2))).raw);
+template <ResultCode func(Core::System&, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw);
 }
 
-template <ResultCode func(u32, u64, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
     FuncReturn(
-        func(static_cast<u32>(Param(0)), Param(1), Param(2), static_cast<u32>(Param(3))).raw);
+        system,
+        func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw);
 }
 
-template <ResultCode func(u32, u64, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), Param(2)).raw);
+template <ResultCode func(Core::System&, u32, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
+                            Param(system, 2), static_cast<u32>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u32*, u64, u64, s64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32, u64, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(
+        system,
+        func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
+}
+
+template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    ResultCode retval =
-        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3)));
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval.raw);
+    const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
+                            static_cast<s64>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64, u64, u32, s64)>
-void SvcWrap() {
-    FuncReturn(
-        func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))).raw);
+template <ResultCode func(Core::System&, u64, u64, u32, s64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u64*, u64, u64, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), Param(3)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval =
+        func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u64, u64, u64, u32, s32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), Param(3), static_cast<u32>(Param(4)),
-                      static_cast<s32>(Param(5)))
-                     .raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3),
+                            static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u64, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), static_cast<u32>(Param(3))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2),
+                            static_cast<u32>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(Handle*, u64, u32, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, Handle*, u64, u32, u32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval =
-        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
+                            static_cast<u32>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64, u32, s32, s64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
-                    static_cast<s64>(Param(3)))
-                   .raw);
+template <ResultCode func(Core::System&, u64, u32, s32, s64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u64, u32, s32, s32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
-                    static_cast<s32>(Param(3)))
-                   .raw);
+template <ResultCode func(Core::System&, u64, u32, s32, s32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+                           .raw);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type u32
 
-template <u32 func()>
-void SvcWrap() {
-    FuncReturn(func());
+template <u32 func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system));
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type u64
 
-template <u64 func()>
-void SvcWrap() {
-    FuncReturn(func());
+template <u64 func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system));
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 /// Function wrappers that return type void
 
-template <void func()>
-void SvcWrap() {
-    func();
+template <void func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    func(system);
 }
 
-template <void func(s64)>
-void SvcWrap() {
-    func(static_cast<s64>(Param(0)));
+template <void func(Core::System&, s64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<s64>(Param(system, 0)));
 }
 
-template <void func(u64, u64 len)>
-void SvcWrap() {
-    func(Param(0), Param(1));
+template <void func(Core::System&, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), Param(system, 1));
 }
 
-template <void func(u64, u64, u64)>
-void SvcWrap() {
-    func(Param(0), Param(1), Param(2));
+template <void func(Core::System&, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), Param(system, 1), Param(system, 2));
 }
 
-template <void func(u32, u64, u64)>
-void SvcWrap() {
-    func(static_cast<u32>(Param(0)), Param(1), Param(2));
+template <void func(Core::System&, u32, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d3984dfc4..1b891f632 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
 #include <optional>
 #include <vector>
 
-#include <boost/range/algorithm_ext/erase.hpp>
-
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -30,7 +28,7 @@
 
 namespace Kernel {
 
-bool Thread::ShouldWait(Thread* thread) const {
+bool Thread::ShouldWait(const Thread* thread) const {
     return status != ThreadStatus::Dead;
 }
 
@@ -43,7 +41,8 @@ Thread::~Thread() = default;
 
 void Thread::Stop() {
     // Cancel any outstanding wakeup events for this thread
-    CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+                                                             callback_handle);
     kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
     callback_handle = 0;
 
@@ -63,21 +62,12 @@ void Thread::Stop() {
     }
     wait_objects.clear();
 
+    owner_process->UnregisterThread(this);
+
     // Mark the TLS slot in the thread's page as free.
     owner_process->FreeTLSSlot(tls_address);
 }
 
-void WaitCurrentThread_Sleep() {
-    Thread* thread = GetCurrentThread();
-    thread->SetStatus(ThreadStatus::WaitSleep);
-}
-
-void ExitCurrentThread() {
-    Thread* thread = GetCurrentThread();
-    thread->Stop();
-    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
-}
-
 void Thread::WakeAfterDelay(s64 nanoseconds) {
     // Don't schedule a wakeup if the thread wants to wait forever
     if (nanoseconds == -1)
@@ -85,12 +75,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
 
     // This function might be called from any thread so we have to be cautious and use the
     // thread-safe version of ScheduleEvent.
-    CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds),
-                                        kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
+        Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
+        callback_handle);
 }
 
 void Thread::CancelWakeupTimer() {
-    CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
+        kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }
 
 static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -115,6 +107,7 @@ void Thread::ResumeFromWait() {
     case ThreadStatus::WaitSleep:
     case ThreadStatus::WaitIPC:
     case ThreadStatus::WaitMutex:
+    case ThreadStatus::WaitCondVar:
     case ThreadStatus::WaitArb:
         break;
 
@@ -181,14 +174,13 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
         return ERR_INVALID_PROCESSOR_ID;
     }
 
-    // TODO(yuriks): Other checks, returning 0xD9001BEA
-
     if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
         LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
         // TODO (bunnei): Find the correct error code to use here
         return ResultCode(-1);
     }
 
+    auto& system = Core::System::GetInstance();
     SharedPtr<Thread> thread(new Thread(kernel));
 
     thread->thread_id = kernel.CreateNewThreadID();
@@ -197,7 +189,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     thread->stack_top = stack_top;
     thread->tpidr_el0 = 0;
     thread->nominal_priority = thread->current_priority = priority;
-    thread->last_running_ticks = CoreTiming::GetTicks();
+    thread->last_running_ticks = system.CoreTiming().GetTicks();
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
     thread->affinity_mask = 1ULL << processor_id;
@@ -208,10 +200,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     thread->name = std::move(name);
     thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
     thread->owner_process = &owner_process;
-    thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id);
-    thread->scheduler->AddThread(thread, priority);
+    thread->scheduler = &system.Scheduler(processor_id);
+    thread->scheduler->AddThread(thread);
     thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
 
+    thread->owner_process->RegisterThread(thread.get());
+
     // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
     // to initialize the context
     ResetThreadContext(thread->context, stack_top, entry_point, arg);
@@ -239,16 +233,16 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
     context.cpu_registers[1] = output;
 }
 
-s32 Thread::GetWaitObjectIndex(WaitObject* object) const {
+s32 Thread::GetWaitObjectIndex(const WaitObject* object) const {
     ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
-    auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
+    const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
     return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
 }
 
 VAddr Thread::GetCommandBufferAddress() const {
     // Offset from the start of TLS at which the IPC command buffer begins.
-    static constexpr int CommandHeaderOffset = 0x80;
-    return GetTLSAddress() + CommandHeaderOffset;
+    constexpr u64 command_header_offset = 0x80;
+    return GetTLSAddress() + command_header_offset;
 }
 
 void Thread::SetStatus(ThreadStatus new_status) {
@@ -257,7 +251,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
     }
 
     if (status == ThreadStatus::Running) {
-        last_running_ticks = CoreTiming::GetTicks();
+        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
     }
 
     status = new_status;
@@ -267,8 +261,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
     if (thread->lock_owner == this) {
         // If the thread is already waiting for this thread to release the mutex, ensure that the
         // waiters list is consistent and return without doing anything.
-        auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-        ASSERT(itr != wait_mutex_threads.end());
+        const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+        ASSERT(iter != wait_mutex_threads.end());
         return;
     }
 
@@ -276,11 +270,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
     ASSERT(thread->lock_owner == nullptr);
 
     // Ensure that the thread is not already in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr == wait_mutex_threads.end());
-
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter == wait_mutex_threads.end());
+
+    // Keep the list in an ordered fashion
+    const auto insertion_point = std::find_if(
+        wait_mutex_threads.begin(), wait_mutex_threads.end(),
+        [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
+    wait_mutex_threads.insert(insertion_point, thread);
     thread->lock_owner = this;
-    wait_mutex_threads.emplace_back(std::move(thread));
+
     UpdatePriority();
 }
 
@@ -288,32 +287,44 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
     ASSERT(thread->lock_owner == this);
 
     // Ensure that the thread is in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr != wait_mutex_threads.end());
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter != wait_mutex_threads.end());
+
+    wait_mutex_threads.erase(iter);
 
-    boost::remove_erase(wait_mutex_threads, thread);
     thread->lock_owner = nullptr;
     UpdatePriority();
 }
 
 void Thread::UpdatePriority() {
-    // Find the highest priority among all the threads that are waiting for this thread's lock
+    // If any of the threads waiting on the mutex have a higher priority
+    // (taking into account priority inheritance), then this thread inherits
+    // that thread's priority.
     u32 new_priority = nominal_priority;
-    for (const auto& thread : wait_mutex_threads) {
-        if (thread->nominal_priority < new_priority)
-            new_priority = thread->nominal_priority;
+    if (!wait_mutex_threads.empty()) {
+        if (wait_mutex_threads.front()->current_priority < new_priority) {
+            new_priority = wait_mutex_threads.front()->current_priority;
+        }
     }
 
-    if (new_priority == current_priority)
+    if (new_priority == current_priority) {
         return;
+    }
 
     scheduler->SetThreadPriority(this, new_priority);
-
     current_priority = new_priority;
 
+    if (!lock_owner) {
+        return;
+    }
+
+    // Ensure that the thread is within the correct location in the waiting list.
+    auto old_owner = lock_owner;
+    lock_owner->RemoveMutexWaiter(this);
+    old_owner->AddMutexWaiter(this);
+
     // Recursively update the priority of the thread that depends on the priority of this one.
-    if (lock_owner)
-        lock_owner->UpdatePriority();
+    lock_owner->UpdatePriority();
 }
 
 void Thread::ChangeCore(u32 core, u64 mask) {
@@ -345,7 +356,7 @@ void Thread::ChangeScheduler() {
     if (*new_processor_id != processor_id) {
         // Remove thread from previous core's scheduler
         scheduler->RemoveThread(this);
-        next_scheduler.AddThread(this, current_priority);
+        next_scheduler.AddThread(this);
     }
 
     processor_id = *new_processor_id;
@@ -360,7 +371,7 @@ void Thread::ChangeScheduler() {
     system.CpuCore(processor_id).PrepareReschedule();
 }
 
-bool Thread::AllWaitObjectsReady() {
+bool Thread::AllWaitObjectsReady() const {
     return std::none_of(
         wait_objects.begin(), wait_objects.end(),
         [this](const SharedPtr<WaitObject>& object) { return object->ShouldWait(this); });
@@ -389,6 +400,14 @@ void Thread::SetActivity(ThreadActivity value) {
     }
 }
 
+void Thread::Sleep(s64 nanoseconds) {
+    // Sleep current thread and check for next thread to schedule
+    SetStatus(ThreadStatus::WaitSleep);
+
+    // Create an event to wake the thread up after the specified nanosecond delay has passed
+    WakeAfterDelay(nanoseconds);
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c48b21aba..83c83e45a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -51,7 +51,8 @@ enum class ThreadStatus {
     WaitIPC,      ///< Waiting for the reply from an IPC request
     WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
     WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true
-    WaitMutex,    ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc
+    WaitMutex,    ///< Waiting due to an ArbitrateLock svc
+    WaitCondVar,  ///< Waiting due to an WaitProcessWideKey svc
     WaitArb,      ///< Waiting due to a SignalToAddress/WaitForAddress svc
     Dormant,      ///< Created but not yet made ready
     Dead          ///< Run to completion, or forcefully terminated
@@ -105,12 +106,12 @@ public:
         return "Thread";
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::Thread;
+    static constexpr HandleType HANDLE_TYPE = HandleType::Thread;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
     void Acquire(Thread* thread) override;
 
     /**
@@ -204,7 +205,7 @@ public:
      * object in the list.
      * @param object Object to query the index of.
      */
-    s32 GetWaitObjectIndex(WaitObject* object) const;
+    s32 GetWaitObjectIndex(const WaitObject* object) const;
 
     /**
      * Stops a thread, invalidating it from further use
@@ -298,7 +299,7 @@ public:
     }
 
     /// Determines whether all the objects this thread is waiting on are ready.
-    bool AllWaitObjectsReady();
+    bool AllWaitObjectsReady() const;
 
     const MutexWaitingThreads& GetMutexWaitingThreads() const {
         return wait_mutex_threads;
@@ -383,6 +384,9 @@ public:
 
     void SetActivity(ThreadActivity value);
 
+    /// Sleeps this thread for the given amount of nanoseconds.
+    void Sleep(s64 nanoseconds);
+
 private:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;
@@ -398,8 +402,14 @@ private:
     VAddr entry_point = 0;
     VAddr stack_top = 0;
 
-    u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application
-    u32 current_priority = 0; ///< Current thread priority, can be temporarily changed
+    /// Nominal thread priority, as set by the emulated application.
+    /// The nominal priority is the thread priority without priority
+    /// inheritance taken into account.
+    u32 nominal_priority = 0;
+
+    /// Current thread priority. This may change over the course of the
+    /// thread's lifetime in order to facilitate priority inheritance.
+    u32 current_priority = 0;
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
@@ -460,14 +470,4 @@ private:
  */
 Thread* GetCurrentThread();
 
-/**
- * Waits the current thread on a sleep
- */
-void WaitCurrentThread_Sleep();
-
-/**
- * Stops the current thread and removes it from the thread_list
- */
-void ExitCurrentThread();
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/timer.cpp b/src/core/hle/kernel/timer.cpp
deleted file mode 100644
index 2c4f50e2b..000000000
--- a/src/core/hle/kernel/timer.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/core_timing_util.h"
-#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/timer.h"
-
-namespace Kernel {
-
-Timer::Timer(KernelCore& kernel) : WaitObject{kernel} {}
-Timer::~Timer() = default;
-
-SharedPtr<Timer> Timer::Create(KernelCore& kernel, ResetType reset_type, std::string name) {
-    SharedPtr<Timer> timer(new Timer(kernel));
-
-    timer->reset_type = reset_type;
-    timer->signaled = false;
-    timer->name = std::move(name);
-    timer->initial_delay = 0;
-    timer->interval_delay = 0;
-    timer->callback_handle = kernel.CreateTimerCallbackHandle(timer).Unwrap();
-
-    return timer;
-}
-
-bool Timer::ShouldWait(Thread* thread) const {
-    return !signaled;
-}
-
-void Timer::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
-
-    if (reset_type == ResetType::OneShot)
-        signaled = false;
-}
-
-void Timer::Set(s64 initial, s64 interval) {
-    // Ensure we get rid of any previous scheduled event
-    Cancel();
-
-    initial_delay = initial;
-    interval_delay = interval;
-
-    if (initial == 0) {
-        // Immediately invoke the callback
-        Signal(0);
-    } else {
-        CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), kernel.TimerCallbackEventType(),
-                                  callback_handle);
-    }
-}
-
-void Timer::Cancel() {
-    CoreTiming::UnscheduleEvent(kernel.TimerCallbackEventType(), callback_handle);
-}
-
-void Timer::Clear() {
-    signaled = false;
-}
-
-void Timer::WakeupAllWaitingThreads() {
-    WaitObject::WakeupAllWaitingThreads();
-}
-
-void Timer::Signal(int cycles_late) {
-    LOG_TRACE(Kernel, "Timer {} fired", GetObjectId());
-
-    signaled = true;
-
-    // Resume all waiting threads
-    WakeupAllWaitingThreads();
-
-    if (interval_delay != 0) {
-        // Reschedule the timer with the interval delay
-        CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(interval_delay) - cycles_late,
-                                  kernel.TimerCallbackEventType(), callback_handle);
-    }
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/timer.h b/src/core/hle/kernel/timer.h
deleted file mode 100644
index 12915c1b1..000000000
--- a/src/core/hle/kernel/timer.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
-
-namespace Kernel {
-
-class KernelCore;
-
-class Timer final : public WaitObject {
-public:
-    /**
-     * Creates a timer
-     * @param kernel The kernel instance to create the timer callback handle for.
-     * @param reset_type ResetType describing how to create the timer
-     * @param name Optional name of timer
-     * @return The created Timer
-     */
-    static SharedPtr<Timer> Create(KernelCore& kernel, ResetType reset_type,
-                                   std::string name = "Unknown");
-
-    std::string GetTypeName() const override {
-        return "Timer";
-    }
-    std::string GetName() const override {
-        return name;
-    }
-
-    static const HandleType HANDLE_TYPE = HandleType::Timer;
-    HandleType GetHandleType() const override {
-        return HANDLE_TYPE;
-    }
-
-    ResetType GetResetType() const {
-        return reset_type;
-    }
-
-    u64 GetInitialDelay() const {
-        return initial_delay;
-    }
-
-    u64 GetIntervalDelay() const {
-        return interval_delay;
-    }
-
-    bool ShouldWait(Thread* thread) const override;
-    void Acquire(Thread* thread) override;
-
-    void WakeupAllWaitingThreads() override;
-
-    /**
-     * Starts the timer, with the specified initial delay and interval.
-     * @param initial Delay until the timer is first fired
-     * @param interval Delay until the timer is fired after the first time
-     */
-    void Set(s64 initial, s64 interval);
-
-    void Cancel();
-    void Clear();
-
-    /**
-     * Signals the timer, waking up any waiting threads and rescheduling it
-     * for the next interval.
-     * This method should not be called from outside the timer callback handler,
-     * lest multiple callback events get scheduled.
-     */
-    void Signal(int cycles_late);
-
-private:
-    explicit Timer(KernelCore& kernel);
-    ~Timer() override;
-
-    ResetType reset_type; ///< The ResetType of this timer
-
-    u64 initial_delay;  ///< The delay until the timer fires for the first time
-    u64 interval_delay; ///< The delay until the timer fires after the first time
-
-    bool signaled;    ///< Whether the timer has been signaled or not
-    std::string name; ///< Name of timer (optional)
-
-    /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
-    Handle callback_handle;
-};
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
new file mode 100644
index 000000000..26c4e5e67
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -0,0 +1,81 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/transfer_memory.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
+TransferMemory::~TransferMemory() = default;
+
+SharedPtr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address, u64 size,
+                                                 MemoryPermission permissions) {
+    SharedPtr<TransferMemory> transfer_memory{new TransferMemory(kernel)};
+
+    transfer_memory->base_address = base_address;
+    transfer_memory->memory_size = size;
+    transfer_memory->owner_permissions = permissions;
+    transfer_memory->owner_process = kernel.CurrentProcess();
+
+    return transfer_memory;
+}
+
+const u8* TransferMemory::GetPointer() const {
+    return backing_block.get()->data();
+}
+
+u64 TransferMemory::GetSize() const {
+    return memory_size;
+}
+
+ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission permissions) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    if (owner_permissions != permissions) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (is_mapped) {
+        return ERR_INVALID_STATE;
+    }
+
+    backing_block = std::make_shared<std::vector<u8>>(size);
+
+    const auto map_state = owner_permissions == MemoryPermission::None
+                               ? MemoryState::TransferMemoryIsolated
+                               : MemoryState::TransferMemory;
+    auto& vm_manager = owner_process->VMManager();
+    const auto map_result = vm_manager.MapMemoryBlock(address, backing_block, 0, size, map_state);
+    if (map_result.Failed()) {
+        return map_result.Code();
+    }
+
+    is_mapped = true;
+    return RESULT_SUCCESS;
+}
+
+ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    auto& vm_manager = owner_process->VMManager();
+    const auto result = vm_manager.UnmapRange(address, size);
+
+    if (result.IsError()) {
+        return result;
+    }
+
+    is_mapped = false;
+    return RESULT_SUCCESS;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
new file mode 100644
index 000000000..a140b1e2b
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -0,0 +1,103 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "core/hle/kernel/object.h"
+
+union ResultCode;
+
+namespace Kernel {
+
+class KernelCore;
+class Process;
+
+enum class MemoryPermission : u32;
+
+/// Defines the interface for transfer memory objects.
+///
+/// Transfer memory is typically used for the purpose of
+/// transferring memory between separate process instances,
+/// thus the name.
+///
+class TransferMemory final : public Object {
+public:
+    static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;
+
+    static SharedPtr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, u64 size,
+                                            MemoryPermission permissions);
+
+    TransferMemory(const TransferMemory&) = delete;
+    TransferMemory& operator=(const TransferMemory&) = delete;
+
+    TransferMemory(TransferMemory&&) = delete;
+    TransferMemory& operator=(TransferMemory&&) = delete;
+
+    std::string GetTypeName() const override {
+        return "TransferMemory";
+    }
+
+    std::string GetName() const override {
+        return GetTypeName();
+    }
+
+    HandleType GetHandleType() const override {
+        return HANDLE_TYPE;
+    }
+
+    /// Gets a pointer to the backing block of this instance.
+    const u8* GetPointer() const;
+
+    /// Gets the size of the memory backing this instance in bytes.
+    u64 GetSize() const;
+
+    /// Attempts to map transfer memory with the given range and memory permissions.
+    ///
+    /// @param address     The base address to being mapping memory at.
+    /// @param size        The size of the memory to map, in bytes.
+    /// @param permissions The memory permissions to check against when mapping memory.
+    ///
+    /// @pre The given address, size, and memory permissions must all match
+    ///      the same values that were given when creating the transfer memory
+    ///      instance.
+    ///
+    ResultCode MapMemory(VAddr address, u64 size, MemoryPermission permissions);
+
+    /// Unmaps the transfer memory with the given range
+    ///
+    /// @param address The base address to begin unmapping memory at.
+    /// @param size    The size of the memory to unmap, in bytes.
+    ///
+    /// @pre The given address and size must be the same as the ones used
+    ///      to create the transfer memory instance.
+    ///
+    ResultCode UnmapMemory(VAddr address, u64 size);
+
+private:
+    explicit TransferMemory(KernelCore& kernel);
+    ~TransferMemory() override;
+
+    /// Memory block backing this instance.
+    std::shared_ptr<std::vector<u8>> backing_block;
+
+    /// The base address for the memory managed by this instance.
+    VAddr base_address = 0;
+
+    /// Size of the memory, in bytes, that this instance manages.
+    u64 memory_size = 0;
+
+    /// The memory permissions that are applied to this instance.
+    MemoryPermission owner_permissions{};
+
+    /// The process that this transfer memory instance was created under.
+    Process* owner_process = nullptr;
+
+    /// Whether or not this transfer memory instance has mapped memory.
+    bool is_mapped = false;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 10ad94aa6..ec0a480ce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,34 +7,42 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/memory_hook.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 #include "core/memory_setup.h"
 
 namespace Kernel {
-
-static const char* GetMemoryStateName(MemoryState state) {
+namespace {
+const char* GetMemoryStateName(MemoryState state) {
     static constexpr const char* names[] = {
-        "Unmapped",         "Io",
-        "Normal",           "CodeStatic",
-        "CodeMutable",      "Heap",
-        "Shared",           "Unknown1",
-        "ModuleCodeStatic", "ModuleCodeMutable",
-        "IpcBuffer0",       "Stack",
-        "ThreadLocal",      "TransferMemoryIsolated",
-        "TransferMemory",   "ProcessMemory",
-        "Inaccessible",     "IpcBuffer1",
-        "IpcBuffer3",       "KernelStack",
+        "Unmapped",       "Io",
+        "Normal",         "Code",
+        "CodeData",       "Heap",
+        "Shared",         "Unknown1",
+        "ModuleCode",     "ModuleCodeData",
+        "IpcBuffer0",     "Stack",
+        "ThreadLocal",    "TransferMemoryIsolated",
+        "TransferMemory", "ProcessMemory",
+        "Inaccessible",   "IpcBuffer1",
+        "IpcBuffer3",     "KernelStack",
     };
 
     return names[ToSvcMemoryState(state)];
 }
 
+// Checks if a given address range lies within a larger address range.
+constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
+                                    VAddr address_range_end) {
+    const VAddr end_address = address + size - 1;
+    return address_range_begin <= address && end_address <= address_range_end - 1;
+}
+} // Anonymous namespace
+
 bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
     ASSERT(base + size == next.base);
     if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -169,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
 
 ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
                                                    MemoryState state,
-                                                   Memory::MemoryHookPointer mmio_handler) {
+                                                   Common::MemoryHookPointer mmio_handler) {
     // This is the appropriately sized VMA that will turn into our allocation.
     CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
     VirtualMemoryArea& final_vma = vma_handle->second;
@@ -248,59 +256,50 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
     return RESULT_SUCCESS;
 }
 
-ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
-        return ERR_INVALID_ADDRESS;
+ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
+    if (size > GetHeapRegionSize()) {
+        return ERR_OUT_OF_MEMORY;
+    }
+
+    // No need to do any additional work if the heap is already the given size.
+    if (size == GetCurrentHeapSize()) {
+        return MakeResult(heap_region_base);
     }
 
     if (heap_memory == nullptr) {
         // Initialize heap
-        heap_memory = std::make_shared<std::vector<u8>>();
-        heap_start = heap_end = target;
+        heap_memory = std::make_shared<std::vector<u8>>(size);
+        heap_end = heap_region_base + size;
     } else {
-        UnmapRange(heap_start, heap_end - heap_start);
-    }
-
-    // If necessary, expand backing vector to cover new heap extents.
-    if (target < heap_start) {
-        heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
-        heap_start = target;
-        RefreshMemoryBlockMappings(heap_memory.get());
-    }
-    if (target + size > heap_end) {
-        heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
-        heap_end = target + size;
-        RefreshMemoryBlockMappings(heap_memory.get());
+        UnmapRange(heap_region_base, GetCurrentHeapSize());
     }
-    ASSERT(heap_end - heap_start == heap_memory->size());
 
-    CASCADE_RESULT(auto vma, MapMemoryBlock(target, heap_memory, target - heap_start, size,
-                                            MemoryState::Heap));
-    Reprotect(vma, perms);
+    // If necessary, expand backing vector to cover new heap extents in
+    // the case of allocating. Otherwise, shrink the backing memory,
+    // if a smaller heap has been requested.
+    const u64 old_heap_size = GetCurrentHeapSize();
+    if (size > old_heap_size) {
+        const u64 alloc_size = size - old_heap_size;
 
-    heap_used = size;
-
-    return MakeResult<VAddr>(heap_end - size);
-}
+        heap_memory->insert(heap_memory->end(), alloc_size, 0);
+        RefreshMemoryBlockMappings(heap_memory.get());
+    } else if (size < old_heap_size) {
+        heap_memory->resize(size);
+        heap_memory->shrink_to_fit();
 
-ResultCode VMManager::HeapFree(VAddr target, u64 size) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
-        return ERR_INVALID_ADDRESS;
+        RefreshMemoryBlockMappings(heap_memory.get());
     }
 
-    if (size == 0) {
-        return RESULT_SUCCESS;
-    }
+    heap_end = heap_region_base + size;
+    ASSERT(GetCurrentHeapSize() == heap_memory->size());
 
-    const ResultCode result = UnmapRange(target, size);
-    if (result.IsError()) {
-        return result;
+    const auto mapping_result =
+        MapMemoryBlock(heap_region_base, heap_memory, 0, size, MemoryState::Heap);
+    if (mapping_result.Failed()) {
+        return mapping_result.Code();
     }
 
-    heap_used -= size;
-    return RESULT_SUCCESS;
+    return MakeResult<VAddr>(heap_region_base);
 }
 
 MemoryInfo VMManager::QueryMemory(VAddr address) const {
@@ -592,6 +591,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
 
     heap_region_base = map_region_end;
     heap_region_end = heap_region_base + heap_region_size;
+    heap_end = heap_region_base;
 
     new_map_region_base = heap_region_end;
     new_map_region_end = new_map_region_base + new_map_region_size;
@@ -618,7 +618,7 @@ void VMManager::ClearPageTable() {
     std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
     page_table.special_regions.clear();
     std::fill(page_table.attributes.begin(), page_table.attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 }
 
 VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
@@ -686,10 +686,6 @@ u64 VMManager::GetTotalMemoryUsage() const {
     return 0xF8000000;
 }
 
-u64 VMManager::GetTotalHeapUsage() const {
-    return heap_used;
-}
-
 VAddr VMManager::GetAddressSpaceBaseAddress() const {
     return address_space_base;
 }
@@ -706,6 +702,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
     return address_space_width;
 }
 
+bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
+                                GetAddressSpaceEndAddress());
+}
+
 VAddr VMManager::GetASLRRegionBaseAddress() const {
     return aslr_region_base;
 }
@@ -750,6 +751,11 @@ u64 VMManager::GetCodeRegionSize() const {
     return code_region_end - code_region_base;
 }
 
+bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
+                                GetCodeRegionEndAddress());
+}
+
 VAddr VMManager::GetHeapRegionBaseAddress() const {
     return heap_region_base;
 }
@@ -762,6 +768,15 @@ u64 VMManager::GetHeapRegionSize() const {
     return heap_region_end - heap_region_base;
 }
 
+u64 VMManager::GetCurrentHeapSize() const {
+    return heap_end - heap_region_base;
+}
+
+bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
+                                GetHeapRegionEndAddress());
+}
+
 VAddr VMManager::GetMapRegionBaseAddress() const {
     return map_region_base;
 }
@@ -774,6 +789,10 @@ u64 VMManager::GetMapRegionSize() const {
     return map_region_end - map_region_base;
 }
 
+bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
+}
+
 VAddr VMManager::GetNewMapRegionBaseAddress() const {
     return new_map_region_base;
 }
@@ -786,6 +805,11 @@ u64 VMManager::GetNewMapRegionSize() const {
     return new_map_region_end - new_map_region_base;
 }
 
+bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
+                                GetNewMapRegionEndAddress());
+}
+
 VAddr VMManager::GetTLSIORegionBaseAddress() const {
     return tls_io_region_base;
 }
@@ -798,4 +822,9 @@ u64 VMManager::GetTLSIORegionSize() const {
     return tls_io_region_end - tls_io_region_base;
 }
 
+bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
+                                GetTLSIORegionEndAddress());
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6091533bc..6f484b7bf 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
+#include "common/memory_hook.h"
+#include "common/page_table.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 
 namespace FileSys {
 enum class ProgramAddressSpaceType : u8;
@@ -164,12 +165,12 @@ enum class MemoryState : u32 {
     Unmapped               = 0x00,
     Io                     = 0x01 | FlagMapped,
     Normal                 = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed,
-    CodeStatic             = 0x03 | CodeFlags  | FlagMapProcess,
-    CodeMutable            = 0x04 | CodeFlags  | FlagMapProcess | FlagCodeMemory,
+    Code                   = 0x03 | CodeFlags  | FlagMapProcess,
+    CodeData               = 0x04 | DataFlags  | FlagMapProcess | FlagCodeMemory,
     Heap                   = 0x05 | DataFlags  | FlagCodeMemory,
     Shared                 = 0x06 | FlagMapped | FlagMemoryPoolAllocated,
-    ModuleCodeStatic       = 0x08 | CodeFlags  | FlagModule | FlagMapProcess,
-    ModuleCodeMutable      = 0x09 | DataFlags  | FlagModule | FlagMapProcess | FlagCodeMemory,
+    ModuleCode             = 0x08 | CodeFlags  | FlagModule | FlagMapProcess,
+    ModuleCodeData         = 0x09 | DataFlags  | FlagModule | FlagMapProcess | FlagCodeMemory,
 
     IpcBuffer0             = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated |
                                     IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned,
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
     // Settings for type = MMIO
     /// Physical address of the register area this VMA maps to.
     PAddr paddr = 0;
-    Memory::MemoryHookPointer mmio_handler = nullptr;
+    Common::MemoryHookPointer mmio_handler = nullptr;
 
     /// Tests if this area can be merged to the right with `next`.
     bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
      * @param mmio_handler The handler that will implement read and write for this MMIO region.
      */
     ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
-                                 Memory::MemoryHookPointer mmio_handler);
+                                 Common::MemoryHookPointer mmio_handler);
 
     /// Unmaps a range of addresses, splitting VMAs as necessary.
     ResultCode UnmapRange(VAddr target, u64 size);
@@ -379,11 +380,41 @@ public:
     /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
     ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms);
 
-    ResultVal<VAddr> HeapAllocate(VAddr target, u64 size, VMAPermission perms);
-    ResultCode HeapFree(VAddr target, u64 size);
-
     ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state);
 
+    /// Attempts to allocate a heap with the given size.
+    ///
+    /// @param size The size of the heap to allocate in bytes.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size that is equal to the size of the current heap,
+    ///       then this function will do nothing and return the current
+    ///       heap's starting address, as there's no need to perform
+    ///       any additional heap allocation work.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size less than the current heap's size, then
+    ///       this function will attempt to shrink the heap.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size larger than the current heap's size, then
+    ///       this function will attempt to extend the size of the heap.
+    ///
+    /// @returns A result indicating either success or failure.
+    ///          <p>
+    ///          If successful, this function will return a result
+    ///          containing the starting address to the allocated heap.
+    ///          <p>
+    ///          If unsuccessful, this function will return a result
+    ///          containing an error code.
+    ///
+    /// @pre The given size must lie within the allowable heap
+    ///      memory region managed by this VMManager instance.
+    ///      Failure to abide by this will result in ERR_OUT_OF_MEMORY
+    ///      being returned as the result.
+    ///
+    ResultVal<VAddr> SetHeapSize(u64 size);
+
     /// Queries the memory manager for information about the given address.
     ///
     /// @param address The address to query the memory manager about for information.
@@ -417,9 +448,6 @@ public:
     /// Gets the total memory usage, used by svcGetInfo
     u64 GetTotalMemoryUsage() const;
 
-    /// Gets the total heap usage, used by svcGetInfo
-    u64 GetTotalHeapUsage() const;
-
     /// Gets the address space base address
     VAddr GetAddressSpaceBaseAddress() const;
 
@@ -432,18 +460,21 @@ public:
     /// Gets the address space width in bits.
     u64 GetAddressSpaceWidth() const;
 
+    /// Determines whether or not the given address range lies within the address space.
+    bool IsWithinAddressSpace(VAddr address, u64 size) const;
+
     /// Gets the base address of the ASLR region.
     VAddr GetASLRRegionBaseAddress() const;
 
     /// Gets the end address of the ASLR region.
     VAddr GetASLRRegionEndAddress() const;
 
-    /// Determines whether or not the specified address range is within the ASLR region.
-    bool IsWithinASLRRegion(VAddr address, u64 size) const;
-
     /// Gets the size of the ASLR region
     u64 GetASLRRegionSize() const;
 
+    /// Determines whether or not the specified address range is within the ASLR region.
+    bool IsWithinASLRRegion(VAddr address, u64 size) const;
+
     /// Gets the base address of the code region.
     VAddr GetCodeRegionBaseAddress() const;
 
@@ -453,6 +484,9 @@ public:
     /// Gets the total size of the code region in bytes.
     u64 GetCodeRegionSize() const;
 
+    /// Determines whether or not the specified range is within the code region.
+    bool IsWithinCodeRegion(VAddr address, u64 size) const;
+
     /// Gets the base address of the heap region.
     VAddr GetHeapRegionBaseAddress() const;
 
@@ -462,6 +496,16 @@ public:
     /// Gets the total size of the heap region in bytes.
     u64 GetHeapRegionSize() const;
 
+    /// Gets the total size of the current heap in bytes.
+    ///
+    /// @note This is the current allocated heap size, not the size
+    ///       of the region it's allowed to exist within.
+    ///
+    u64 GetCurrentHeapSize() const;
+
+    /// Determines whether or not the specified range is within the heap region.
+    bool IsWithinHeapRegion(VAddr address, u64 size) const;
+
     /// Gets the base address of the map region.
     VAddr GetMapRegionBaseAddress() const;
 
@@ -471,6 +515,9 @@ public:
     /// Gets the total size of the map region in bytes.
     u64 GetMapRegionSize() const;
 
+    /// Determines whether or not the specified range is within the map region.
+    bool IsWithinMapRegion(VAddr address, u64 size) const;
+
     /// Gets the base address of the new map region.
     VAddr GetNewMapRegionBaseAddress() const;
 
@@ -480,6 +527,9 @@ public:
     /// Gets the total size of the new map region in bytes.
     u64 GetNewMapRegionSize() const;
 
+    /// Determines whether or not the given address range is within the new map region
+    bool IsWithinNewMapRegion(VAddr address, u64 size) const;
+
     /// Gets the base address of the TLS IO region.
     VAddr GetTLSIORegionBaseAddress() const;
 
@@ -489,9 +539,12 @@ public:
     /// Gets the total size of the TLS IO region in bytes.
     u64 GetTLSIORegionSize() const;
 
+    /// Determines if the given address range is within the TLS IO region.
+    bool IsWithinTLSIORegion(VAddr address, u64 size) const;
+
     /// Each VMManager has its own page table, which is set as the main one when the owning process
     /// is scheduled.
-    Memory::PageTable page_table;
+    Common::PageTable page_table{Memory::PAGE_BITS};
 
 private:
     using VMAIter = VMAMap::iterator;
@@ -606,9 +659,9 @@ private:
     // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
     // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
     std::shared_ptr<std::vector<u8>> heap_memory;
-    // The left/right bounds of the address space covered by heap_memory.
-    VAddr heap_start = 0;
+
+    // The end of the currently allocated heap. This is not an inclusive
+    // end of the range. This is essentially 'base_address + current_size'.
     VAddr heap_end = 0;
-    u64 heap_used = 0;
 };
 } // namespace Kernel
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h
index d70b67893..04464a51a 100644
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -24,7 +24,7 @@ public:
      * @param thread The thread about which we're deciding.
      * @return True if the current thread should wait due to this object being unavailable
      */
-    virtual bool ShouldWait(Thread* thread) const = 0;
+    virtual bool ShouldWait(const Thread* thread) const = 0;
 
     /// Acquire/lock the object for the specified thread if it is available
     virtual void Acquire(Thread* thread) = 0;
@@ -33,19 +33,19 @@ public:
      * Add a thread to wait on this object
      * @param thread Pointer to thread to add
      */
-    virtual void AddWaitingThread(SharedPtr<Thread> thread);
+    void AddWaitingThread(SharedPtr<Thread> thread);
 
     /**
      * Removes a thread from waiting on this object (e.g. if it was resumed already)
      * @param thread Pointer to thread to remove
      */
-    virtual void RemoveWaitingThread(Thread* thread);
+    void RemoveWaitingThread(Thread* thread);
 
     /**
      * Wake up all threads waiting on this object that can be awoken, in priority order,
      * and set the synchronization result and output of the thread.
      */
-    virtual void WakeupAllWaitingThreads();
+    void WakeupAllWaitingThreads();
 
     /**
      * Wakes up a single thread waiting on this object.
diff --git a/src/core/hle/kernel/writable_event.h b/src/core/hle/kernel/writable_event.h
index c9068dd3d..d00c92a6b 100644
--- a/src/core/hle/kernel/writable_event.h
+++ b/src/core/hle/kernel/writable_event.h
@@ -37,7 +37,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::WritableEvent;
+    static constexpr HandleType HANDLE_TYPE = HandleType::WritableEvent;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index bfb77cc31..8a3701151 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,20 +8,11 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/bit_field.h"
-#include "common/common_funcs.h"
 #include "common/common_types.h"
 
 // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
 
 /**
- * Detailed description of the error. Code 0 always means success.
- */
-enum class ErrorDescription : u32 {
-    Success = 0,
-    RemoteProcessDead = 301,
-};
-
-/**
  * Identifies the module which caused the error. Error codes can be propagated through a call
  * chain, meaning that this doesn't always correspond to the module where the API call made is
  * contained.
@@ -121,30 +112,18 @@ enum class ErrorModule : u32 {
     ShopN = 811,
 };
 
-/// Encapsulates a CTR-OS error code, allowing it to be separated into its constituent fields.
+/// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields.
 union ResultCode {
     u32 raw;
 
     BitField<0, 9, ErrorModule> module;
     BitField<9, 13, u32> description;
 
-    // The last bit of `level` is checked by apps and the kernel to determine if a result code is an
-    // error
-    BitField<31, 1, u32> is_error;
-
     constexpr explicit ResultCode(u32 raw) : raw(raw) {}
 
-    constexpr ResultCode(ErrorModule module, ErrorDescription description)
-        : ResultCode(module, static_cast<u32>(description)) {}
-
     constexpr ResultCode(ErrorModule module_, u32 description_)
         : raw(module.FormatValue(module_) | description.FormatValue(description_)) {}
 
-    constexpr ResultCode& operator=(const ResultCode& o) {
-        raw = o.raw;
-        return *this;
-    }
-
     constexpr bool IsSuccess() const {
         return raw == 0;
     }
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index d1cbe0e44..85271d418 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <array>
 #include <cinttypes>
 #include <cstring>
-#include <stack>
 #include "audio_core/audio_renderer.h"
 #include "core/core.h"
 #include "core/file_sys/savedata_factory.h"
@@ -13,7 +13,7 @@
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/acc/profile_manager.h"
 #include "core/hle/service/am/am.h"
@@ -93,38 +93,84 @@ void IWindowController::AcquireForegroundRights(Kernel::HLERequestContext& ctx)
 }
 
 IAudioController::IAudioController() : ServiceFramework("IAudioController") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"},
-        {1, &IAudioController::GetMainAppletExpectedMasterVolume,
-         "GetMainAppletExpectedMasterVolume"},
-        {2, &IAudioController::GetLibraryAppletExpectedMasterVolume,
-         "GetLibraryAppletExpectedMasterVolume"},
-        {3, nullptr, "ChangeMainAppletMasterVolume"},
-        {4, nullptr, "SetTransparentVolumeRate"},
+        {1, &IAudioController::GetMainAppletExpectedMasterVolume, "GetMainAppletExpectedMasterVolume"},
+        {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, "GetLibraryAppletExpectedMasterVolume"},
+        {3, &IAudioController::ChangeMainAppletMasterVolume, "ChangeMainAppletMasterVolume"},
+        {4, &IAudioController::SetTransparentAudioRate, "SetTransparentVolumeRate"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
 IAudioController::~IAudioController() = default;
 
 void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    IPC::RequestParser rp{ctx};
+    const float main_applet_volume_tmp = rp.Pop<float>();
+    const float library_applet_volume_tmp = rp.Pop<float>();
+
+    LOG_DEBUG(Service_AM, "called. main_applet_volume={}, library_applet_volume={}",
+              main_applet_volume_tmp, library_applet_volume_tmp);
+
+    // Ensure the volume values remain within the 0-100% range
+    main_applet_volume = std::clamp(main_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
+    library_applet_volume =
+        std::clamp(library_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
+
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
 void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called. main_applet_volume={}", main_applet_volume);
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
-    rb.Push(volume);
+    rb.Push(main_applet_volume);
 }
 
 void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called. library_applet_volume={}", library_applet_volume);
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
-    rb.Push(volume);
+    rb.Push(library_applet_volume);
+}
+
+void IAudioController::ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx) {
+    struct Parameters {
+        float volume;
+        s64 fade_time_ns;
+    };
+    static_assert(sizeof(Parameters) == 16);
+
+    IPC::RequestParser rp{ctx};
+    const auto parameters = rp.PopRaw<Parameters>();
+
+    LOG_DEBUG(Service_AM, "called. volume={}, fade_time_ns={}", parameters.volume,
+              parameters.fade_time_ns);
+
+    main_applet_volume = std::clamp(parameters.volume, min_allowed_volume, max_allowed_volume);
+    fade_time_ns = std::chrono::nanoseconds{parameters.fade_time_ns};
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void IAudioController::SetTransparentAudioRate(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const float transparent_volume_rate_tmp = rp.Pop<float>();
+
+    LOG_DEBUG(Service_AM, "called. transparent_volume_rate={}", transparent_volume_rate_tmp);
+
+    // Clamp volume range to 0-100%.
+    transparent_volume_rate =
+        std::clamp(transparent_volume_rate_tmp, min_allowed_volume, max_allowed_volume);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
 }
 
 IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
@@ -169,7 +215,21 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
 
 IDisplayController::~IDisplayController() = default;
 
-IDebugFunctions::IDebugFunctions() : ServiceFramework("IDebugFunctions") {}
+IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
+    // clang-format off
+    static const FunctionInfo functions[] = {
+        {0, nullptr, "NotifyMessageToHomeMenuForDebug"},
+        {1, nullptr, "OpenMainApplication"},
+        {10, nullptr, "EmulateButtonEvent"},
+        {20, nullptr, "InvalidateTransitionLayer"},
+        {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
+        {40, nullptr, "GetAppletResourceUsageInfo"},
+    };
+    // clang-format on
+
+    RegisterHandlers(functions);
+}
+
 IDebugFunctions::~IDebugFunctions() = default;
 
 ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
@@ -179,8 +239,8 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
         {0, nullptr, "Exit"},
         {1, &ISelfController::LockExit, "LockExit"},
         {2, &ISelfController::UnlockExit, "UnlockExit"},
-        {3, nullptr, "EnterFatalSection"},
-        {4, nullptr, "LeaveFatalSection"},
+        {3, &ISelfController::EnterFatalSection, "EnterFatalSection"},
+        {4, &ISelfController::LeaveFatalSection, "LeaveFatalSection"},
         {9, &ISelfController::GetLibraryAppletLaunchableEvent, "GetLibraryAppletLaunchableEvent"},
         {10, &ISelfController::SetScreenShotPermission, "SetScreenShotPermission"},
         {11, &ISelfController::SetOperationModeChangedNotification, "SetOperationModeChangedNotification"},
@@ -225,41 +285,54 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
 
 ISelfController::~ISelfController() = default;
 
-void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
-    // Takes 3 input u8s with each field located immediately after the previous
-    // u8, these are bool flags. No output.
+void ISelfController::LockExit(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AM, "(STUBBED) called");
 
-    IPC::RequestParser rp{ctx};
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
 
-    struct FocusHandlingModeParams {
-        u8 unknown0;
-        u8 unknown1;
-        u8 unknown2;
-    };
-    auto flags = rp.PopRaw<FocusHandlingModeParams>();
+void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_AM, "(STUBBED) called");
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+void ISelfController::EnterFatalSection(Kernel::HLERequestContext& ctx) {
+    ++num_fatal_sections_entered;
+    LOG_DEBUG(Service_AM, "called. Num fatal sections entered: {}", num_fatal_sections_entered);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
+void ISelfController::LeaveFatalSection(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_AM, "called.");
 
-    bool flag = rp.Pop<bool>();
-    LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag);
+    // Entry and exit of fatal sections must be balanced.
+    if (num_fatal_sections_entered == 0) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ResultCode{ErrorModule::AM, 512});
+        return;
+    }
+
+    --num_fatal_sections_entered;
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
+void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_AM, "(STUBBED) called");
+
+    launchable_event.writable->Signal();
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushCopyObjects(launchable_event.readable);
+}
+
 void ISelfController::SetScreenShotPermission(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AM, "(STUBBED) called");
 
@@ -277,40 +350,52 @@ void ISelfController::SetOperationModeChangedNotification(Kernel::HLERequestCont
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) {
-    // Takes 3 input u8s with each field located immediately after the previous
-    // u8, these are bool flags. No output.
+void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
 
-    bool enabled = rp.Pop<bool>();
-    LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled);
+    bool flag = rp.Pop<bool>();
+    LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::LockExit(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
+    // Takes 3 input u8s with each field located immediately after the previous
+    // u8, these are bool flags. No output.
+    IPC::RequestParser rp{ctx};
+
+    struct FocusHandlingModeParams {
+        u8 unknown0;
+        u8 unknown1;
+        u8 unknown2;
+    };
+    const auto flags = rp.PopRaw<FocusHandlingModeParams>();
+
+    LOG_WARNING(Service_AM, "(STUBBED) called. unknown0={}, unknown1={}, unknown2={}",
+                flags.unknown0, flags.unknown1, flags.unknown2);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) {
+void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AM, "(STUBBED) called");
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) {
+    // Takes 3 input u8s with each field located immediately after the previous
+    // u8, these are bool flags. No output.
+    IPC::RequestParser rp{ctx};
 
-    launchable_event.writable->Signal();
+    bool enabled = rp.Pop<bool>();
+    LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled);
 
-    IPC::ResponseBuilder rb{ctx, 2, 1};
+    IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
-    rb.PushCopyObjects(launchable_event.readable);
 }
 
 void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) {
@@ -322,14 +407,15 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c
 
 void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AM, "(STUBBED) called");
+
     // TODO(Subv): Find out how AM determines the display to use, for now just
     // create the layer in the Default display.
-    u64 display_id = nvflinger->OpenDisplay("Default");
-    u64 layer_id = nvflinger->CreateLayer(display_id);
+    const auto display_id = nvflinger->OpenDisplay("Default");
+    const auto layer_id = nvflinger->CreateLayer(*display_id);
 
     IPC::ResponseBuilder rb{ctx, 4};
     rb.Push(RESULT_SUCCESS);
-    rb.Push(layer_id);
+    rb.Push(*layer_id);
 }
 
 void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) {
@@ -846,19 +932,19 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
     rp.SetCurrentOffset(3);
     const auto handle{rp.Pop<Kernel::Handle>()};
 
-    const auto shared_mem =
-        Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::SharedMemory>(
+    const auto transfer_mem =
+        Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(
             handle);
 
-    if (shared_mem == nullptr) {
+    if (transfer_mem == nullptr) {
         LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle);
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ResultCode(-1));
         return;
     }
 
-    const u8* mem_begin = shared_mem->GetPointer();
-    const u8* mem_end = mem_begin + shared_mem->GetSize();
+    const u8* const mem_begin = transfer_mem->GetPointer();
+    const u8* const mem_end = mem_begin + transfer_mem->GetSize();
     std::vector<u8> memory{mem_begin, mem_end};
 
     IPC::ResponseBuilder rb{ctx, 2, 0, 1};
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index b6113cfdd..991b7d47c 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <chrono>
 #include <memory>
 #include <queue>
 #include "core/hle/kernel/writable_event.h"
@@ -81,8 +82,21 @@ private:
     void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx);
     void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
     void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
+    void ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx);
+    void SetTransparentAudioRate(Kernel::HLERequestContext& ctx);
 
-    u32 volume{100};
+    static constexpr float min_allowed_volume = 0.0f;
+    static constexpr float max_allowed_volume = 1.0f;
+
+    float main_applet_volume{0.25f};
+    float library_applet_volume{max_allowed_volume};
+    float transparent_volume_rate{min_allowed_volume};
+
+    // Volume transition fade time in nanoseconds.
+    // e.g. If the main applet volume was 0% and was changed to 50%
+    //      with a fade of 50ns, then over the course of 50ns,
+    //      the volume will gradually fade up to 50%
+    std::chrono::nanoseconds fade_time_ns{0};
 };
 
 class IDisplayController final : public ServiceFramework<IDisplayController> {
@@ -103,17 +117,19 @@ public:
     ~ISelfController() override;
 
 private:
-    void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
-    void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
-    void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
-    void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
-    void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
     void LockExit(Kernel::HLERequestContext& ctx);
     void UnlockExit(Kernel::HLERequestContext& ctx);
+    void EnterFatalSection(Kernel::HLERequestContext& ctx);
+    void LeaveFatalSection(Kernel::HLERequestContext& ctx);
     void GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx);
+    void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
+    void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
+    void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
+    void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
+    void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
+    void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
     void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx);
     void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);
-    void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
     void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
     void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
     void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
@@ -121,6 +137,7 @@ private:
     std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
     Kernel::EventPair launchable_event;
     u32 idle_time_detection_extension = 0;
+    u64 num_fatal_sections_entered = 0;
 };
 
 class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp
index 41a573a91..b888f861d 100644
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -249,7 +249,8 @@ AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
         {300, nullptr, "OpenOverlayAppletProxy"},
         {350, nullptr, "OpenSystemApplicationProxy"},
         {400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"},
-        {401, nullptr, "GetSystemAppletControllerForDebug"},
+        {410, nullptr, "GetSystemAppletControllerForDebug"},
+        {1000, nullptr, "GetDebugFunctions"},
     };
     // clang-format on
 
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index f255f74b5..8c5bd6059 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
 #include "common/string_util.h"
 #include "core/core.h"
 #include "core/frontend/applets/software_keyboard.h"
+#include "core/hle/result.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/software_keyboard.h"
 
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index efd5753a1..b93a30d28 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
 #include <vector>
 
 #include "common/common_funcs.h"
+#include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/applets.h"
 
+union ResultCode;
+
 namespace Service::AM::Applets {
 
 enum class KeysetDisable : u32 {
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
index 9b0aa7f5f..7e17df98a 100644
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -86,7 +86,7 @@ static FileSys::VirtualFile GetManualRomFS() {
     if (loader.ReadManualRomFS(out) == Loader::ResultStatus::Success)
         return out;
 
-    const auto& installed{FileSystem::GetUnionContents()};
+    const auto& installed{Core::System::GetInstance().GetContentProvider()};
     const auto res = installed.GetEntry(Core::System::GetInstance().CurrentProcess()->GetTitleID(),
                                         FileSys::ContentRecordType::Manual);
 
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index b506bc3dd..2d768d9fc 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -33,11 +33,11 @@ static bool CheckAOCTitleIDMatchesBase(u64 title_id, u64 base) {
 
 static std::vector<u64> AccumulateAOCTitleIDs() {
     std::vector<u64> add_on_content;
-    const auto rcu = FileSystem::GetUnionContents();
+    const auto& rcu = Core::System::GetInstance().GetContentProvider();
     const auto list =
         rcu.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data);
     std::transform(list.begin(), list.end(), std::back_inserter(add_on_content),
-                   [](const FileSys::RegisteredCacheEntry& rce) { return rce.title_id; });
+                   [](const FileSys::ContentProviderEntry& rce) { return rce.title_id; });
     add_on_content.erase(
         std::remove_if(
             add_on_content.begin(), add_on_content.end(),
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index 657010312..e5daefdde 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -2,9 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "common/logging/log.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/audin_u.h"
 
 namespace Service::Audio {
@@ -12,6 +9,7 @@ namespace Service::Audio {
 class IAudioIn final : public ServiceFramework<IAudioIn> {
 public:
     IAudioIn() : ServiceFramework("IAudioIn") {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, nullptr, "GetAudioInState"},
             {1, nullptr, "StartAudioIn"},
@@ -28,16 +26,23 @@ public:
             {12, nullptr, "SetAudioInDeviceGain"},
             {13, nullptr, "GetAudioInDeviceGain"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
-    ~IAudioIn() = default;
 };
 
 AudInU::AudInU() : ServiceFramework("audin:u") {
+    // clang-format off
     static const FunctionInfo functions[] = {
-        {0, nullptr, "ListAudioIns"},    {1, nullptr, "OpenAudioIn"},      {2, nullptr, "Unknown"},
-        {3, nullptr, "OpenAudioInAuto"}, {4, nullptr, "ListAudioInsAuto"},
+        {0, nullptr, "ListAudioIns"},
+        {1, nullptr, "OpenAudioIn"},
+        {2, nullptr, "Unknown"},
+        {3, nullptr, "OpenAudioInAuto"},
+        {4, nullptr, "ListAudioInsAuto"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index dc6a6b188..39acb7b23 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audout_u.h"
+#include "core/hle/service/audio/errors.h"
 #include "core/memory.h"
 
 namespace Service::Audio {
 
-namespace ErrCodes {
-enum {
-    ErrorUnknown = 2,
-    BufferCountExceeded = 8,
-};
-}
-
 constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
 constexpr int DefaultSampleRate{48000};
 
@@ -68,12 +62,12 @@ public:
         RegisterHandlers(functions);
 
         // This is the event handle used to check if the audio buffer was released
-        auto& kernel = Core::System::GetInstance().Kernel();
-        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioOutBufferReleased");
+        auto& system = Core::System::GetInstance();
+        buffer_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
 
-        stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count,
-                                       std::move(unique_name),
+        stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
+                                       audio_params.channel_count, std::move(unique_name),
                                        [=]() { buffer_event.writable->Signal(); });
     }
 
@@ -100,7 +94,7 @@ private:
 
         if (stream->IsPlaying()) {
             IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown));
+            rb.Push(ERR_OPERATION_FAILED);
             return;
         }
 
@@ -113,7 +107,9 @@ private:
     void StopAudioOut(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_Audio, "called");
 
-        audio_core.StopStream(stream);
+        if (stream->IsPlaying()) {
+            audio_core.StopStream(stream);
+        }
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:
 
         if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
             IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded));
+            rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
+            return;
         }
 
         IPC::ResponseBuilder rb{ctx, 2};
@@ -153,7 +150,6 @@ private:
     void GetReleasedAudioOutBufferImpl(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_Audio, "called {}", ctx.Description());
 
-        IPC::RequestParser rp{ctx};
         const u64 max_count{ctx.GetWriteBufferSize() / sizeof(u64)};
         const auto released_buffers{audio_core.GetTagsAndReleaseBuffers(stream, max_count)};
 
@@ -197,12 +193,9 @@ private:
 void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_Audio, "called");
 
-    IPC::RequestParser rp{ctx};
-
     ctx.WriteBuffer(DefaultDevice);
 
     IPC::ResponseBuilder rb{ctx, 3};
-
     rb.Push(RESULT_SUCCESS);
     rb.Push<u32>(1); // Amount of audio devices
 }
diff --git a/src/core/hle/service/audio/audrec_u.cpp b/src/core/hle/service/audio/audrec_u.cpp
index 34974afa9..1a5aed9ed 100644
--- a/src/core/hle/service/audio/audrec_u.cpp
+++ b/src/core/hle/service/audio/audrec_u.cpp
@@ -2,9 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "common/logging/log.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/audrec_u.h"
 
 namespace Service::Audio {
@@ -12,6 +9,7 @@ namespace Service::Audio {
 class IFinalOutputRecorder final : public ServiceFramework<IFinalOutputRecorder> {
 public:
     IFinalOutputRecorder() : ServiceFramework("IFinalOutputRecorder") {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, nullptr, "GetFinalOutputRecorderState"},
             {1, nullptr, "StartFinalOutputRecorder"},
@@ -20,13 +18,15 @@ public:
             {4, nullptr, "RegisterBufferEvent"},
             {5, nullptr, "GetReleasedFinalOutputRecorderBuffer"},
             {6, nullptr, "ContainsFinalOutputRecorderBuffer"},
-            {7, nullptr, "Unknown"},
+            {7, nullptr, "GetFinalOutputRecorderBufferEndTime"},
             {8, nullptr, "AppendFinalOutputRecorderBufferAuto"},
             {9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"},
+            {10, nullptr, "FlushFinalOutputRecorderBuffers"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
-    ~IFinalOutputRecorder() = default;
 };
 
 AudRecU::AudRecU() : ServiceFramework("audrec:u") {
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 945259c7d..1dde6edb7 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -10,6 +10,7 @@
 #include "common/alignment.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
+#include "common/string_util.h"
 #include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
@@ -17,6 +18,7 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audren_u.h"
+#include "core/hle/service/audio/errors.h"
 
 namespace Service::Audio {
 
@@ -37,15 +39,16 @@ public:
             {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
             {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
             {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
-            {11, nullptr, "ExecuteAudioRendererRendering"},
+            {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
         };
         // clang-format on
         RegisterHandlers(functions);
 
-        auto& kernel = Core::System::GetInstance().Kernel();
-        system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioRenderer:SystemEvent");
-        renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable);
+        auto& system = Core::System::GetInstance();
+        system_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
+        renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
+                                                              system_event.writable);
     }
 
 private:
@@ -137,6 +140,17 @@ private:
         rb.Push(rendering_time_limit_percent);
     }
 
+    void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_Audio, "called");
+
+        // This service command currently only reports an unsupported operation
+        // error code, or aborts. Given that, we just always return an error
+        // code in this case.
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ERR_NOT_SUPPORTED);
+    }
+
     Kernel::EventPair system_event;
     std::unique_ptr<AudioCore::AudioRenderer> renderer;
     u32 rendering_time_limit_percent = 100;
@@ -171,7 +185,6 @@ public:
 private:
     void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_Audio, "(STUBBED) called");
-        IPC::RequestParser rp{ctx};
 
         constexpr std::array<char, 15> audio_interface{{"AudioInterface"}};
         ctx.WriteBuffer(audio_interface);
@@ -182,13 +195,13 @@ private:
     }
 
     void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
-        LOG_WARNING(Service_Audio, "(STUBBED) called");
-
         IPC::RequestParser rp{ctx};
-        f32 volume = static_cast<f32>(rp.Pop<u32>());
+        const f32 volume = rp.Pop<f32>();
+
+        const auto device_name_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(device_name_buffer);
 
-        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
+        LOG_WARNING(Service_Audio, "(STUBBED) called. name={}, volume={}", name, volume);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
@@ -196,7 +209,6 @@ private:
 
     void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_Audio, "(STUBBED) called");
-        IPC::RequestParser rp{ctx};
 
         constexpr std::array<char, 12> audio_interface{{"AudioDevice"}};
         ctx.WriteBuffer(audio_interface);
@@ -229,14 +241,16 @@ private:
 }; // namespace Audio
 
 AudRenU::AudRenU() : ServiceFramework("audren:u") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
         {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
-        {2, &AudRenU::GetAudioDevice, "GetAudioDevice"},
-        {3, nullptr, "OpenAudioRendererAuto"},
-        {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo,
-         "GetAudioDeviceServiceWithRevisionInfo"},
+        {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
+        {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
+        {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
@@ -245,12 +259,7 @@ AudRenU::~AudRenU() = default;
 void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_Audio, "called");
 
-    IPC::RequestParser rp{ctx};
-    auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
-    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
-    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
+    OpenAudioRendererImpl(ctx);
 }
 
 void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -259,20 +268,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_Audio, "called");
 
     u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
-    buffer_sz += params.unknown_c * 1024;
-    buffer_sz += 0x940 * (params.unknown_c + 1);
+    buffer_sz += params.submix_count * 1024;
+    buffer_sz += 0x940 * (params.submix_count + 1);
     buffer_sz += 0x3F0 * params.voice_count;
-    buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
+    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
     buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
-    buffer_sz +=
-        Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
-                            (params.mix_buffer_count + 6),
-                        0x40);
+    buffer_sz += Common::AlignUp(
+        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
+            (params.mix_buffer_count + 6),
+        0x40);
 
     if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        u32 count = params.unknown_c + 1;
+        const u32 count = params.submix_count + 1;
         u64 node_count = Common::AlignUp(count, 0x40);
-        u64 node_state_buffer_sz =
+        const u64 node_state_buffer_sz =
             4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
         u64 edge_matrix_buffer_sz = 0;
         node_count = Common::AlignUp(count * count, 0x40);
@@ -286,19 +295,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
 
     buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
     if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        buffer_sz += 0xE0 * params.unknown_2c;
+        buffer_sz += 0xE0 * params.num_splitter_send_channels;
         buffer_sz += 0x20 * params.splitter_count;
-        buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
+        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
     }
     buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
     u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
                     ((params.voice_count * 256) | 0x40);
 
-    if (params.unknown_1c >= 1) {
+    if (params.performance_frame_count >= 1) {
         output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
                                       16 * params.voice_count + 16) +
                                      0x658) *
-                                            (params.unknown_1c + 1) +
+                                            (params.performance_frame_count + 1) +
                                         0xc0,
                                     0x40) +
                     output_sz;
@@ -313,7 +322,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
 }
 
-void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
+void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_Audio, "called");
 
     IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -322,6 +331,12 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
     rb.PushIpcInterface<Audio::IAudioDevice>();
 }
 
+void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_Audio, "called");
+
+    OpenAudioRendererImpl(ctx);
+}
+
 void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_Audio, "(STUBBED) called");
 
@@ -332,6 +347,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
                                                 // based on the current revision
 }
 
+void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<IAudioRenderer>(params);
+}
+
 bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
     u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
     switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index c6bc3a90a..e55d25973 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -20,9 +20,12 @@ public:
 private:
     void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
     void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
-    void GetAudioDevice(Kernel::HLERequestContext& ctx);
+    void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
+    void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
     void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
 
+    void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
+
     enum class AudioFeatures : u32 {
         Splitter,
     };
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h
new file mode 100644
index 000000000..6f8c09bcf
--- /dev/null
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::Audio {
+
+constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
+constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
+constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
+
+} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index a850cadc8..cb4a1160d 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -5,110 +5,117 @@
 #include <chrono>
 #include <cstring>
 #include <memory>
-#include <optional>
 #include <vector>
 
 #include <opus.h>
+#include <opus_multistream.h>
 
-#include "common/common_funcs.h"
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/hwopus.h"
 
 namespace Service::Audio {
-
+namespace {
 struct OpusDeleter {
-    void operator()(void* ptr) const {
-        operator delete(ptr);
+    void operator()(OpusMSDecoder* ptr) const {
+        opus_multistream_decoder_destroy(ptr);
     }
 };
 
-class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
+using OpusDecoderPtr = std::unique_ptr<OpusMSDecoder, OpusDeleter>;
+
+struct OpusPacketHeader {
+    // Packet size in bytes.
+    u32_be size;
+    // Indicates the final range of the codec's entropy coder.
+    u32_be final_range;
+};
+static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
+
+class OpusDecoderState {
 public:
-    IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
-                                u32 channel_count)
-        : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
-          sample_rate(sample_rate), channel_count(channel_count) {
-        static const FunctionInfo functions[] = {
-            {0, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
-            {1, nullptr, "SetContext"},
-            {2, nullptr, "DecodeInterleavedForMultiStream"},
-            {3, nullptr, "SetContextForMultiStream"},
-            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance,
-             "DecodeInterleavedWithPerformance"},
-            {5, nullptr, "Unknown5"},
-            {6, nullptr, "Unknown6"},
-            {7, nullptr, "Unknown7"},
-        };
-        RegisterHandlers(functions);
-    }
+    /// Describes extra behavior that may be asked of the decoding context.
+    enum class ExtraBehavior {
+        /// No extra behavior.
+        None,
 
-private:
-    void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
+        /// Resets the decoder context back to a freshly initialized state.
+        ResetContext,
+    };
 
-        u32 consumed = 0;
-        u32 sample_count = 0;
-        std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
-        if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples)) {
-            LOG_ERROR(Audio, "Failed to decode opus data");
-            IPC::ResponseBuilder rb{ctx, 2};
-            // TODO(ogniK): Use correct error code
-            rb.Push(ResultCode(-1));
-            return;
+    enum class PerfTime {
+        Disabled,
+        Enabled,
+    };
+
+    explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
+        : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
+
+    // Decodes interleaved Opus packets. Optionally allows reporting time taken to
+    // perform the decoding, as well as any relevant extra behavior.
+    void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
+                           ExtraBehavior extra_behavior) {
+        if (perf_time == PerfTime::Disabled) {
+            DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
+        } else {
+            u64 performance = 0;
+            DecodeInterleavedHelper(ctx, &performance, extra_behavior);
         }
-        IPC::ResponseBuilder rb{ctx, 4};
-        rb.Push(RESULT_SUCCESS);
-        rb.Push<u32>(consumed);
-        rb.Push<u32>(sample_count);
-        ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
     }
 
-    void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
-
+private:
+    void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
+                                 ExtraBehavior extra_behavior) {
         u32 consumed = 0;
         u32 sample_count = 0;
-        u64 performance = 0;
         std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
-        if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
-                                       performance)) {
+
+        if (extra_behavior == ExtraBehavior::ResetContext) {
+            ResetDecoderContext();
+        }
+
+        if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
             LOG_ERROR(Audio, "Failed to decode opus data");
             IPC::ResponseBuilder rb{ctx, 2};
             // TODO(ogniK): Use correct error code
             rb.Push(ResultCode(-1));
             return;
         }
-        IPC::ResponseBuilder rb{ctx, 6};
+
+        const u32 param_size = performance != nullptr ? 6 : 4;
+        IPC::ResponseBuilder rb{ctx, param_size};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u32>(consumed);
         rb.Push<u32>(sample_count);
-        rb.Push<u64>(performance);
+        if (performance) {
+            rb.Push<u64>(*performance);
+        }
         ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
     }
 
-    bool Decoder_DecodeInterleaved(
-        u32& consumed, u32& sample_count, const std::vector<u8>& input,
-        std::vector<opus_int16>& output,
-        std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) {
+    bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
+                        std::vector<opus_int16>& output, u64* out_performance_time) const {
         const auto start_time = std::chrono::high_resolution_clock::now();
-        std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
-        if (sizeof(OpusHeader) > input.size()) {
+        const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
+        if (sizeof(OpusPacketHeader) > input.size()) {
             LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
-                      sizeof(OpusHeader), input.size());
+                      sizeof(OpusPacketHeader), input.size());
             return false;
         }
-        OpusHeader hdr{};
-        std::memcpy(&hdr, input.data(), sizeof(OpusHeader));
-        if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) {
+
+        OpusPacketHeader hdr{};
+        std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
+        if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
             LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
-                      sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size());
+                      sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
             return false;
         }
-        auto frame = input.data() + sizeof(OpusHeader);
-        auto decoded_sample_count = opus_packet_get_nb_samples(
-            frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)),
+
+        const auto frame = input.data() + sizeof(OpusPacketHeader);
+        const auto decoded_sample_count = opus_packet_get_nb_samples(
+            frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
             static_cast<opus_int32>(sample_rate));
         if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
             LOG_ERROR(
@@ -117,46 +124,117 @@ private:
                 decoded_sample_count * channel_count * sizeof(u16), raw_output_sz);
             return false;
         }
+
         const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
-        auto out_sample_count =
-            opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0);
+        const auto out_sample_count =
+            opus_multistream_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
         if (out_sample_count < 0) {
             LOG_ERROR(Audio,
                       "Incorrect sample count received from opus_decode, "
                       "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
-                      out_sample_count, frame_size, static_cast<u32>(hdr.sz));
+                      out_sample_count, frame_size, static_cast<u32>(hdr.size));
             return false;
         }
+
         const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
         sample_count = out_sample_count;
-        consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
-        if (performance_time.has_value()) {
-            performance_time->get() =
+        consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
+        if (out_performance_time != nullptr) {
+            *out_performance_time =
                 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
         }
+
         return true;
     }
 
-    struct OpusHeader {
-        u32_be sz; // Needs to be BE for some odd reason
-        INSERT_PADDING_WORDS(1);
-    };
-    static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
+    void ResetDecoderContext() {
+        ASSERT(decoder != nullptr);
+
+        opus_multistream_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
+    }
 
-    std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
+    OpusDecoderPtr decoder;
     u32 sample_rate;
     u32 channel_count;
 };
 
-static std::size_t WorkerBufferSize(u32 channel_count) {
+class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
+public:
+    explicit IHardwareOpusDecoderManager(OpusDecoderState decoder_state)
+        : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
+            {1, nullptr, "SetContext"},
+            {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
+            {3, nullptr, "SetContextForMultiStream"},
+            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
+            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
+            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+            {7, nullptr, "DecodeInterleavedForMultiStream"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+
+private:
+    void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Disabled,
+                                        OpusDecoderState::ExtraBehavior::None);
+    }
+
+    void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled,
+                                        OpusDecoderState::ExtraBehavior::None);
+    }
+
+    void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        IPC::RequestParser rp{ctx};
+        const auto extra_behavior = rp.Pop<bool>() ? OpusDecoderState::ExtraBehavior::ResetContext
+                                                   : OpusDecoderState::ExtraBehavior::None;
+
+        decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled, extra_behavior);
+    }
+
+    OpusDecoderState decoder_state;
+};
+
+std::size_t WorkerBufferSize(u32 channel_count) {
     ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
-    return opus_decoder_get_size(static_cast<int>(channel_count));
+    constexpr int num_streams = 1;
+    const int num_stereo_streams = channel_count == 2 ? 1 : 0;
+    return opus_multistream_decoder_get_size(num_streams, num_stereo_streams);
 }
 
+// Creates the mapping table that maps the input channels to the particular
+// output channels. In the stereo case, we map the left and right input channels
+// to the left and right output channels respectively.
+//
+// However, in the monophonic case, we only map the one available channel
+// to the sole output channel. We specify 255 for the would-be right channel
+// as this is a special value defined by Opus to indicate to the decoder to
+// ignore that channel.
+std::array<u8, 2> CreateMappingTable(u32 channel_count) {
+    if (channel_count == 2) {
+        return {{0, 1}};
+    }
+
+    return {{0, 255}};
+}
+} // Anonymous namespace
+
 void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
     const auto sample_rate = rp.Pop<u32>();
     const auto channel_count = rp.Pop<u32>();
+
     LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count);
 
     ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
@@ -174,9 +252,10 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
 
 void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
-    auto sample_rate = rp.Pop<u32>();
-    auto channel_count = rp.Pop<u32>();
-    auto buffer_sz = rp.Pop<u32>();
+    const auto sample_rate = rp.Pop<u32>();
+    const auto channel_count = rp.Pop<u32>();
+    const auto buffer_sz = rp.Pop<u32>();
+
     LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate,
               channel_count, buffer_sz);
 
@@ -185,12 +264,18 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
                "Invalid sample rate");
     ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
 
-    std::size_t worker_sz = WorkerBufferSize(channel_count);
+    const std::size_t worker_sz = WorkerBufferSize(channel_count);
     ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
-    std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
-        static_cast<OpusDecoder*>(operator new(worker_sz))};
-    if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
-        LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
+
+    const int num_stereo_streams = channel_count == 2 ? 1 : 0;
+    const auto mapping_table = CreateMappingTable(channel_count);
+
+    int error = 0;
+    OpusDecoderPtr decoder{
+        opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
+                                        num_stereo_streams, mapping_table.data(), &error)};
+    if (error != OPUS_OK || decoder == nullptr) {
+        LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
         IPC::ResponseBuilder rb{ctx, 2};
         // TODO(ogniK): Use correct error code
         rb.Push(ResultCode(-1));
@@ -199,8 +284,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
 
     IPC::ResponseBuilder rb{ctx, 2, 0, 1};
     rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate,
-                                                     channel_count);
+    rb.PushIpcInterface<IHardwareOpusDecoderManager>(
+        OpusDecoderState{std::move(decoder), sample_rate, channel_count});
 }
 
 HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp
index 5704ca0ab..59ef603e1 100644
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -19,16 +19,16 @@ public:
     explicit Bt() : ServiceFramework{"bt"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "Unknown0"},
-            {1, nullptr, "Unknown1"},
-            {2, nullptr, "Unknown2"},
-            {3, nullptr, "Unknown3"},
-            {4, nullptr, "Unknown4"},
-            {5, nullptr, "Unknown5"},
-            {6, nullptr, "Unknown6"},
-            {7, nullptr, "Unknown7"},
-            {8, nullptr, "Unknown8"},
-            {9, &Bt::RegisterEvent, "RegisterEvent"},
+            {0, nullptr, "LeClientReadCharacteristic"},
+            {1, nullptr, "LeClientReadDescriptor"},
+            {2, nullptr, "LeClientWriteCharacteristic"},
+            {3, nullptr, "LeClientWriteDescriptor"},
+            {4, nullptr, "LeClientRegisterNotification"},
+            {5, nullptr, "LeClientDeregisterNotification"},
+            {6, nullptr, "SetLeResponse"},
+            {7, nullptr, "LeSendIndication"},
+            {8, nullptr, "GetLeEventInfo"},
+            {9, &Bt::RegisterBleEvent, "RegisterBleEvent"},
         };
         // clang-format on
         RegisterHandlers(functions);
@@ -39,7 +39,7 @@ public:
     }
 
 private:
-    void RegisterEvent(Kernel::HLERequestContext& ctx) {
+    void RegisterBleEvent(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_BTM, "(STUBBED) called");
 
         IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -55,11 +55,11 @@ public:
     explicit BtDrv() : ServiceFramework{"btdrv"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "Unknown"},
-            {1, nullptr, "Init"},
-            {2, nullptr, "Enable"},
-            {3, nullptr, "Disable"},
-            {4, nullptr, "CleanupAndShutdown"},
+            {0, nullptr, "InitializeBluetoothDriver"},
+            {1, nullptr, "InitializeBluetooth"},
+            {2, nullptr, "EnableBluetooth"},
+            {3, nullptr, "DisableBluetooth"},
+            {4, nullptr, "CleanupBluetooth"},
             {5, nullptr, "GetAdapterProperties"},
             {6, nullptr, "GetAdapterProperty"},
             {7, nullptr, "SetAdapterProperty"},
@@ -70,36 +70,91 @@ public:
             {12, nullptr, "CancelBond"},
             {13, nullptr, "PinReply"},
             {14, nullptr, "SspReply"},
-            {15, nullptr, "Unknown2"},
-            {16, nullptr, "InitInterfaces"},
-            {17, nullptr, "HidHostInterface_Connect"},
-            {18, nullptr, "HidHostInterface_Disconnect"},
-            {19, nullptr, "HidHostInterface_SendData"},
-            {20, nullptr, "HidHostInterface_SendData2"},
-            {21, nullptr, "HidHostInterface_SetReport"},
-            {22, nullptr, "HidHostInterface_GetReport"},
-            {23, nullptr, "HidHostInterface_WakeController"},
-            {24, nullptr, "HidHostInterface_AddPairedDevice"},
-            {25, nullptr, "HidHostInterface_GetPairedDevice"},
-            {26, nullptr, "HidHostInterface_CleanupAndShutdown"},
-            {27, nullptr, "Unknown3"},
-            {28, nullptr, "ExtInterface_SetTSI"},
-            {29, nullptr, "ExtInterface_SetBurstMode"},
-            {30, nullptr, "ExtInterface_SetZeroRetran"},
-            {31, nullptr, "ExtInterface_SetMcMode"},
-            {32, nullptr, "ExtInterface_StartLlrMode"},
-            {33, nullptr, "ExtInterface_ExitLlrMode"},
-            {34, nullptr, "ExtInterface_SetRadio"},
-            {35, nullptr, "ExtInterface_SetVisibility"},
-            {36, nullptr, "Unknown4"},
-            {37, nullptr, "Unknown5"},
-            {38, nullptr, "HidHostInterface_GetLatestPlr"},
-            {39, nullptr, "ExtInterface_GetPendingConnections"},
-            {40, nullptr, "HidHostInterface_GetChannelMap"},
-            {41, nullptr, "SetIsBluetoothBoostEnabled"},
-            {42, nullptr, "GetIsBluetoothBoostEnabled"},
-            {43, nullptr, "SetIsBluetoothAfhEnabled"},
-            {44, nullptr, "GetIsBluetoothAfhEnabled"},
+            {15, nullptr, "GetEventInfo"},
+            {16, nullptr, "InitializeHid"},
+            {17, nullptr, "HidConnect"},
+            {18, nullptr, "HidDisconnect"},
+            {19, nullptr, "HidSendData"},
+            {20, nullptr, "HidSendData2"},
+            {21, nullptr, "HidSetReport"},
+            {22, nullptr, "HidGetReport"},
+            {23, nullptr, "HidWakeController"},
+            {24, nullptr, "HidAddPairedDevice"},
+            {25, nullptr, "HidGetPairedDevice"},
+            {26, nullptr, "CleanupHid"},
+            {27, nullptr, "HidGetEventInfo"},
+            {28, nullptr, "ExtSetTsi"},
+            {29, nullptr, "ExtSetBurstMode"},
+            {30, nullptr, "ExtSetZeroRetran"},
+            {31, nullptr, "ExtSetMcMode"},
+            {32, nullptr, "ExtStartLlrMode"},
+            {33, nullptr, "ExtExitLlrMode"},
+            {34, nullptr, "ExtSetRadio"},
+            {35, nullptr, "ExtSetVisibility"},
+            {36, nullptr, "ExtSetTbfcScan"},
+            {37, nullptr, "RegisterHidReportEvent"},
+            {38, nullptr, "HidGetReportEventInfo"},
+            {39, nullptr, "GetLatestPlr"},
+            {40, nullptr, "ExtGetPendingConnections"},
+            {41, nullptr, "GetChannelMap"},
+            {42, nullptr, "EnableBluetoothBoostSetting"},
+            {43, nullptr, "IsBluetoothBoostSettingEnabled"},
+            {44, nullptr, "EnableBluetoothAfhSetting"},
+            {45, nullptr, "IsBluetoothAfhSettingEnabled"},
+            {46, nullptr, "InitializeBluetoothLe"},
+            {47, nullptr, "EnableBluetoothLe"},
+            {48, nullptr, "DisableBluetoothLe"},
+            {49, nullptr, "CleanupBluetoothLe"},
+            {50, nullptr, "SetLeVisibility"},
+            {51, nullptr, "SetLeConnectionParameter"},
+            {52, nullptr, "SetLeDefaultConnectionParameter"},
+            {53, nullptr, "SetLeAdvertiseData"},
+            {54, nullptr, "SetLeAdvertiseParameter"},
+            {55, nullptr, "StartLeScan"},
+            {56, nullptr, "StopLeScan"},
+            {57, nullptr, "AddLeScanFilterCondition"},
+            {58, nullptr, "DeleteLeScanFilterCondition"},
+            {59, nullptr, "DeleteLeScanFilter"},
+            {60, nullptr, "ClearLeScanFilters"},
+            {61, nullptr, "EnableLeScanFilter"},
+            {62, nullptr, "RegisterLeClient"},
+            {63, nullptr, "UnregisterLeClient"},
+            {64, nullptr, "UnregisterLeClientAll"},
+            {65, nullptr, "LeClientConnect"},
+            {66, nullptr, "LeClientCancelConnection"},
+            {67, nullptr, "LeClientDisconnect"},
+            {68, nullptr, "LeClientGetAttributes"},
+            {69, nullptr, "LeClientDiscoverService"},
+            {70, nullptr, "LeClientConfigureMtu"},
+            {71, nullptr, "RegisterLeServer"},
+            {72, nullptr, "UnregisterLeServer"},
+            {73, nullptr, "LeServerConnect"},
+            {74, nullptr, "LeServerDisconnect"},
+            {75, nullptr, "CreateLeService"},
+            {76, nullptr, "StartLeService"},
+            {77, nullptr, "AddLeCharacteristic"},
+            {78, nullptr, "AddLeDescriptor"},
+            {79, nullptr, "GetLeCoreEventInfo"},
+            {80, nullptr, "LeGetFirstCharacteristic"},
+            {81, nullptr, "LeGetNextCharacteristic"},
+            {82, nullptr, "LeGetFirstDescriptor"},
+            {83, nullptr, "LeGetNextDescriptor"},
+            {84, nullptr, "RegisterLeCoreDataPath"},
+            {85, nullptr, "UnregisterLeCoreDataPath"},
+            {86, nullptr, "RegisterLeHidDataPath"},
+            {87, nullptr, "UnregisterLeHidDataPath"},
+            {88, nullptr, "RegisterLeDataPath"},
+            {89, nullptr, "UnregisterLeDataPath"},
+            {90, nullptr, "LeClientReadCharacteristic"},
+            {91, nullptr, "LeClientReadDescriptor"},
+            {92, nullptr, "LeClientWriteCharacteristic"},
+            {93, nullptr, "LeClientWriteDescriptor"},
+            {94, nullptr, "LeClientRegisterNotification"},
+            {95, nullptr, "LeClientDeregisterNotification"},
+            {96, nullptr, "GetLeHidEventInfo"},
+            {97, nullptr, "RegisterBleHidEvent"},
+            {98, nullptr, "SetLeScanParameter"},
+            {256, nullptr, "GetIsManufacturingMode"}
         };
         // clang-format on
 
diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp
index ef7398a23..4f15c3f19 100644
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -20,38 +20,38 @@ public:
     explicit IBtmUserCore() : ServiceFramework{"IBtmUserCore"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, &IBtmUserCore::GetScanEvent, "GetScanEvent"},
-            {1, nullptr, "Unknown1"},
-            {2, nullptr, "Unknown2"},
-            {3, nullptr, "Unknown3"},
-            {4, nullptr, "Unknown4"},
-            {5, nullptr, "Unknown5"},
-            {6, nullptr, "Unknown6"},
-            {7, nullptr, "Unknown7"},
-            {8, nullptr, "Unknown8"},
-            {9, nullptr, "Unknown9"},
-            {10, nullptr, "Unknown10"},
-            {17, &IBtmUserCore::GetConnectionEvent, "GetConnectionEvent"},
-            {18, nullptr, "Unknown18"},
-            {19, nullptr, "Unknown19"},
-            {20, nullptr, "Unknown20"},
-            {21, nullptr, "Unknown21"},
-            {22, nullptr, "Unknown22"},
-            {23, nullptr, "Unknown23"},
-            {24, nullptr, "Unknown24"},
-            {25, nullptr, "Unknown25"},
-            {26, &IBtmUserCore::GetDiscoveryEvent, "AcquireBleServiceDiscoveryEventImpl"},
-            {27, nullptr, "Unknown27"},
-            {28, nullptr, "Unknown28"},
-            {29, nullptr, "Unknown29"},
-            {30, nullptr, "Unknown30"},
-            {31, nullptr, "Unknown31"},
-            {32, nullptr, "Unknown32"},
-            {33, &IBtmUserCore::GetConfigEvent, "GetConfigEvent"},
-            {34, nullptr, "Unknown34"},
-            {35, nullptr, "Unknown35"},
-            {36, nullptr, "Unknown36"},
-            {37, nullptr, "Unknown37"},
+            {0, &IBtmUserCore::AcquireBleScanEvent, "AcquireBleScanEvent"},
+            {1, nullptr, "GetBleScanFilterParameter"},
+            {2, nullptr, "GetBleScanFilterParameter2"},
+            {3, nullptr, "StartBleScanForGeneral"},
+            {4, nullptr, "StopBleScanForGeneral"},
+            {5, nullptr, "GetBleScanResultsForGeneral"},
+            {6, nullptr, "StartBleScanForPaired"},
+            {7, nullptr, "StopBleScanForPaired"},
+            {8, nullptr, "StartBleScanForSmartDevice"},
+            {9, nullptr, "StopBleScanForSmartDevice"},
+            {10, nullptr, "GetBleScanResultsForSmartDevice"},
+            {17, &IBtmUserCore::AcquireBleConnectionEvent, "AcquireBleConnectionEvent"},
+            {18, nullptr, "BleConnect"},
+            {19, nullptr, "BleDisconnect"},
+            {20, nullptr, "BleGetConnectionState"},
+            {21, nullptr, "AcquireBlePairingEvent"},
+            {22, nullptr, "BlePairDevice"},
+            {23, nullptr, "BleUnPairDevice"},
+            {24, nullptr, "BleUnPairDevice2"},
+            {25, nullptr, "BleGetPairedDevices"},
+            {26, &IBtmUserCore::AcquireBleServiceDiscoveryEvent, "AcquireBleServiceDiscoveryEvent"},
+            {27, nullptr, "GetGattServices"},
+            {28, nullptr, "GetGattService"},
+            {29, nullptr, "GetGattIncludedServices"},
+            {30, nullptr, "GetBelongingGattService"},
+            {31, nullptr, "GetGattCharacteristics"},
+            {32, nullptr, "GetGattDescriptors"},
+            {33, &IBtmUserCore::AcquireBleMtuConfigEvent, "AcquireBleMtuConfigEvent"},
+            {34, nullptr, "ConfigureBleMtu"},
+            {35, nullptr, "GetBleMtu"},
+            {36, nullptr, "RegisterBleGattDataPath"},
+            {37, nullptr, "UnregisterBleGattDataPath"},
         };
         // clang-format on
         RegisterHandlers(functions);
@@ -68,7 +68,7 @@ public:
     }
 
 private:
-    void GetScanEvent(Kernel::HLERequestContext& ctx) {
+    void AcquireBleScanEvent(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_BTM, "(STUBBED) called");
 
         IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -76,7 +76,7 @@ private:
         rb.PushCopyObjects(scan_event.readable);
     }
 
-    void GetConnectionEvent(Kernel::HLERequestContext& ctx) {
+    void AcquireBleConnectionEvent(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_BTM, "(STUBBED) called");
 
         IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -84,7 +84,7 @@ private:
         rb.PushCopyObjects(connection_event.readable);
     }
 
-    void GetDiscoveryEvent(Kernel::HLERequestContext& ctx) {
+    void AcquireBleServiceDiscoveryEvent(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_BTM, "(STUBBED) called");
 
         IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -92,7 +92,7 @@ private:
         rb.PushCopyObjects(service_discovery.readable);
     }
 
-    void GetConfigEvent(Kernel::HLERequestContext& ctx) {
+    void AcquireBleMtuConfigEvent(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_BTM, "(STUBBED) called");
 
         IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -111,14 +111,14 @@ public:
     explicit BTM_USR() : ServiceFramework{"btm:u"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, &BTM_USR::GetCoreImpl, "GetCoreImpl"},
+            {0, &BTM_USR::GetCore, "GetCore"},
         };
         // clang-format on
         RegisterHandlers(functions);
     }
 
 private:
-    void GetCoreImpl(Kernel::HLERequestContext& ctx) {
+    void GetCore(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_BTM, "called");
 
         IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -134,26 +134,64 @@ public:
         static const FunctionInfo functions[] = {
             {0, nullptr, "Unknown1"},
             {1, nullptr, "Unknown2"},
-            {2, nullptr, "RegisterSystemEventForConnectedDeviceConditionImpl"},
+            {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"},
             {3, nullptr, "Unknown3"},
             {4, nullptr, "Unknown4"},
             {5, nullptr, "Unknown5"},
             {6, nullptr, "Unknown6"},
             {7, nullptr, "Unknown7"},
-            {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfoImpl"},
+            {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"},
             {9, nullptr, "Unknown8"},
             {10, nullptr, "Unknown9"},
             {11, nullptr, "Unknown10"},
             {12, nullptr, "Unknown11"},
             {13, nullptr, "Unknown12"},
-            {14, nullptr, "EnableRadioImpl"},
-            {15, nullptr, "DisableRadioImpl"},
+            {14, nullptr, "EnableRadio"},
+            {15, nullptr, "DisableRadio"},
             {16, nullptr, "Unknown13"},
             {17, nullptr, "Unknown14"},
             {18, nullptr, "Unknown15"},
             {19, nullptr, "Unknown16"},
             {20, nullptr, "Unknown17"},
             {21, nullptr, "Unknown18"},
+            {22, nullptr, "Unknown19"},
+            {23, nullptr, "Unknown20"},
+            {24, nullptr, "Unknown21"},
+            {25, nullptr, "Unknown22"},
+            {26, nullptr, "Unknown23"},
+            {27, nullptr, "Unknown24"},
+            {28, nullptr, "Unknown25"},
+            {29, nullptr, "Unknown26"},
+            {30, nullptr, "Unknown27"},
+            {31, nullptr, "Unknown28"},
+            {32, nullptr, "Unknown29"},
+            {33, nullptr, "Unknown30"},
+            {34, nullptr, "Unknown31"},
+            {35, nullptr, "Unknown32"},
+            {36, nullptr, "Unknown33"},
+            {37, nullptr, "Unknown34"},
+            {38, nullptr, "Unknown35"},
+            {39, nullptr, "Unknown36"},
+            {40, nullptr, "Unknown37"},
+            {41, nullptr, "Unknown38"},
+            {42, nullptr, "Unknown39"},
+            {43, nullptr, "Unknown40"},
+            {44, nullptr, "Unknown41"},
+            {45, nullptr, "Unknown42"},
+            {46, nullptr, "Unknown43"},
+            {47, nullptr, "Unknown44"},
+            {48, nullptr, "Unknown45"},
+            {49, nullptr, "Unknown46"},
+            {50, nullptr, "Unknown47"},
+            {51, nullptr, "Unknown48"},
+            {52, nullptr, "Unknown49"},
+            {53, nullptr, "Unknown50"},
+            {54, nullptr, "Unknown51"},
+            {55, nullptr, "Unknown52"},
+            {56, nullptr, "Unknown53"},
+            {57, nullptr, "Unknown54"},
+            {58, nullptr, "Unknown55"},
+            {59, nullptr, "Unknown56"},
         };
         // clang-format on
 
@@ -166,7 +204,7 @@ public:
     explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "RegisterSystemEventForDiscoveryImpl"},
+            {0, nullptr, "RegisterSystemEventForDiscovery"},
             {1, nullptr, "Unknown1"},
             {2, nullptr, "Unknown2"},
             {3, nullptr, "Unknown3"},
@@ -175,6 +213,10 @@ public:
             {6, nullptr, "Unknown6"},
             {7, nullptr, "Unknown7"},
             {8, nullptr, "Unknown8"},
+            {9, nullptr, "Unknown9"},
+            {10, nullptr, "Unknown10"},
+            {11, nullptr, "Unknown11"},
+            {12, nullptr, "Unknown11"},
         };
         // clang-format on
 
@@ -187,16 +229,16 @@ public:
     explicit IBtmSystemCore() : ServiceFramework{"IBtmSystemCore"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "StartGamepadPairingImpl"},
-            {1, nullptr, "CancelGamepadPairingImpl"},
-            {2, nullptr, "ClearGamepadPairingDatabaseImpl"},
-            {3, nullptr, "GetPairedGamepadCountImpl"},
-            {4, nullptr, "EnableRadioImpl"},
-            {5, nullptr, "DisableRadioImpl"},
-            {6, nullptr, "GetRadioOnOffImpl"},
-            {7, nullptr, "AcquireRadioEventImpl"},
-            {8, nullptr, "AcquireGamepadPairingEventImpl"},
-            {9, nullptr, "IsGamepadPairingStartedImpl"},
+            {0, nullptr, "StartGamepadPairing"},
+            {1, nullptr, "CancelGamepadPairing"},
+            {2, nullptr, "ClearGamepadPairingDatabase"},
+            {3, nullptr, "GetPairedGamepadCount"},
+            {4, nullptr, "EnableRadio"},
+            {5, nullptr, "DisableRadio"},
+            {6, nullptr, "GetRadioOnOff"},
+            {7, nullptr, "AcquireRadioEvent"},
+            {8, nullptr, "AcquireGamepadPairingEvent"},
+            {9, nullptr, "IsGamepadPairingStarted"},
         };
         // clang-format on
 
@@ -209,7 +251,7 @@ public:
     explicit BTM_SYS() : ServiceFramework{"btm:sys"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, &BTM_SYS::GetCoreImpl, "GetCoreImpl"},
+            {0, &BTM_SYS::GetCore, "GetCore"},
         };
         // clang-format on
 
@@ -217,7 +259,7 @@ public:
     }
 
 private:
-    void GetCoreImpl(Kernel::HLERequestContext& ctx) {
+    void GetCore(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_BTM, "called");
 
         IPC::ResponseBuilder rb{ctx, 2, 0, 1};
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 770590d0b..2c229bcad 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -25,21 +25,34 @@ Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
 Module::Interface::~Interface() = default;
 
 struct FatalInfo {
-    std::array<u64_le, 31> registers{}; // TODO(ogniK): See if this actually is registers or
-                                        // not(find a game which has non zero valeus)
-    u64_le unk0{};
-    u64_le unk1{};
-    u64_le unk2{};
-    u64_le unk3{};
-    u64_le unk4{};
-    u64_le unk5{};
-    u64_le unk6{};
+    enum class Architecture : s32 {
+        AArch64,
+        AArch32,
+    };
+
+    const char* ArchAsString() const {
+        return arch == Architecture::AArch64 ? "AArch64" : "AArch32";
+    }
+
+    std::array<u64_le, 31> registers{};
+    u64_le sp{};
+    u64_le pc{};
+    u64_le pstate{};
+    u64_le afsr0{};
+    u64_le afsr1{};
+    u64_le esr{};
+    u64_le far{};
 
     std::array<u64_le, 32> backtrace{};
-    u64_le unk7{};
-    u64_le unk8{};
+    u64_le program_entry_point{};
+
+    // Bit flags that indicate which registers have been set with values
+    // for this context. The service itself uses these to determine which
+    // registers to specifically print out.
+    u64_le set_flags{};
+
     u32_le backtrace_size{};
-    u32_le unk9{};
+    Architecture arch{};
     u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding?
 };
 static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size");
@@ -52,36 +65,36 @@ enum class FatalType : u32 {
 
 static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) {
     const auto title_id = Core::CurrentProcess()->GetTitleID();
-    std::string crash_report =
-        fmt::format("Yuzu {}-{} crash report\n"
-                    "Title ID:                        {:016x}\n"
-                    "Result:                          0x{:X} ({:04}-{:04d})\n"
-                    "\n",
-                    Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
-                    2000 + static_cast<u32>(error_code.module.Value()),
-                    static_cast<u32>(error_code.description.Value()), info.unk8, info.unk7);
+    std::string crash_report = fmt::format(
+        "Yuzu {}-{} crash report\n"
+        "Title ID:                        {:016x}\n"
+        "Result:                          0x{:X} ({:04}-{:04d})\n"
+        "Set flags:                       0x{:16X}\n"
+        "Program entry point:             0x{:16X}\n"
+        "\n",
+        Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
+        2000 + static_cast<u32>(error_code.module.Value()),
+        static_cast<u32>(error_code.description.Value()), info.set_flags, info.program_entry_point);
     if (info.backtrace_size != 0x0) {
         crash_report += "Registers:\n";
-        // TODO(ogniK): This is just a guess, find a game which actually has non zero values
         for (size_t i = 0; i < info.registers.size(); i++) {
             crash_report +=
                 fmt::format("    X[{:02d}]:                       {:016x}\n", i, info.registers[i]);
         }
-        crash_report += fmt::format("    Unknown 0:                   {:016x}\n", info.unk0);
-        crash_report += fmt::format("    Unknown 1:                   {:016x}\n", info.unk1);
-        crash_report += fmt::format("    Unknown 2:                   {:016x}\n", info.unk2);
-        crash_report += fmt::format("    Unknown 3:                   {:016x}\n", info.unk3);
-        crash_report += fmt::format("    Unknown 4:                   {:016x}\n", info.unk4);
-        crash_report += fmt::format("    Unknown 5:                   {:016x}\n", info.unk5);
-        crash_report += fmt::format("    Unknown 6:                   {:016x}\n", info.unk6);
+        crash_report += fmt::format("    SP:                          {:016x}\n", info.sp);
+        crash_report += fmt::format("    PC:                          {:016x}\n", info.pc);
+        crash_report += fmt::format("    PSTATE:                      {:016x}\n", info.pstate);
+        crash_report += fmt::format("    AFSR0:                       {:016x}\n", info.afsr0);
+        crash_report += fmt::format("    AFSR1:                       {:016x}\n", info.afsr1);
+        crash_report += fmt::format("    ESR:                         {:016x}\n", info.esr);
+        crash_report += fmt::format("    FAR:                         {:016x}\n", info.far);
         crash_report += "\nBacktrace:\n";
         for (size_t i = 0; i < info.backtrace_size; i++) {
             crash_report +=
                 fmt::format("    Backtrace[{:02d}]:               {:016x}\n", i, info.backtrace[i]);
         }
-        crash_report += fmt::format("\nUnknown 7:                       0x{:016x}\n", info.unk7);
-        crash_report += fmt::format("Unknown 8:                       0x{:016x}\n", info.unk8);
-        crash_report += fmt::format("Unknown 9:                       0x{:016x}\n", info.unk9);
+
+        crash_report += fmt::format("Architecture:                    {}\n", info.ArchAsString());
         crash_report += fmt::format("Unknown 10:                      0x{:016x}\n", info.unk10);
     }
 
@@ -125,13 +138,13 @@ static void ThrowFatalError(ResultCode error_code, FatalType fatal_type, const F
     case FatalType::ErrorReport:
         GenerateErrorReport(error_code, info);
         break;
-    };
+    }
 }
 
 void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp{ctx};
-    auto error_code = rp.Pop<ResultCode>();
+    const auto error_code = rp.Pop<ResultCode>();
 
     ThrowFatalError(error_code, FatalType::ErrorScreen, {});
     IPC::ResponseBuilder rb{ctx, 2};
@@ -141,8 +154,8 @@ void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
 void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp(ctx);
-    auto error_code = rp.Pop<ResultCode>();
-    auto fatal_type = rp.PopEnum<FatalType>();
+    const auto error_code = rp.Pop<ResultCode>();
+    const auto fatal_type = rp.PopEnum<FatalType>();
 
     ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy
     IPC::ResponseBuilder rb{ctx, 2};
@@ -152,9 +165,9 @@ void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
 void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp(ctx);
-    auto error_code = rp.Pop<ResultCode>();
-    auto fatal_type = rp.PopEnum<FatalType>();
-    auto fatal_info = ctx.ReadBuffer();
+    const auto error_code = rp.Pop<ResultCode>();
+    const auto fatal_type = rp.PopEnum<FatalType>();
+    const auto fatal_info = ctx.ReadBuffer();
     FatalInfo info{};
 
     ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!");
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index c6da2df43..1ebfeb4bf 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -197,13 +197,16 @@ ResultCode VfsDirectoryServiceWrapper::RenameDirectory(const std::string& src_pa
 
 ResultVal<FileSys::VirtualFile> VfsDirectoryServiceWrapper::OpenFile(const std::string& path_,
                                                                      FileSys::Mode mode) const {
-    std::string path(FileUtil::SanitizePath(path_));
-    auto npath = path;
-    while (npath.size() > 0 && (npath[0] == '/' || npath[0] == '\\'))
-        npath = npath.substr(1);
+    const std::string path(FileUtil::SanitizePath(path_));
+    std::string_view npath = path;
+    while (!npath.empty() && (npath[0] == '/' || npath[0] == '\\')) {
+        npath.remove_prefix(1);
+    }
+
     auto file = backing->GetFileRelative(npath);
-    if (file == nullptr)
+    if (file == nullptr) {
         return FileSys::ERROR_PATH_NOT_FOUND;
+    }
 
     if (mode == FileSys::Mode::Append) {
         return MakeResult<FileSys::VirtualFile>(
@@ -319,15 +322,15 @@ ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId stora
 }
 
 ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
-                                            FileSys::SaveDataDescriptor save_struct) {
+                                            const FileSys::SaveDataDescriptor& descriptor) {
     LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}",
-              static_cast<u8>(space), save_struct.DebugInfo());
+              static_cast<u8>(space), descriptor.DebugInfo());
 
     if (save_data_factory == nullptr) {
         return FileSys::ERROR_ENTITY_NOT_FOUND;
     }
 
-    return save_data_factory->Open(space, save_struct);
+    return save_data_factory->Open(space, descriptor);
 }
 
 ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space) {
@@ -388,11 +391,6 @@ void WriteSaveDataSize(FileSys::SaveDataType type, u64 title_id, u128 user_id,
         save_data_factory->WriteSaveDataSize(type, title_id, user_id, new_value);
 }
 
-FileSys::RegisteredCacheUnion GetUnionContents() {
-    return FileSys::RegisteredCacheUnion{
-        {GetSystemNANDContents(), GetUserNANDContents(), GetSDMCContents()}};
-}
-
 FileSys::RegisteredCache* GetSystemNANDContents() {
     LOG_TRACE(Service_FS, "Opening System NAND Contents");
 
@@ -457,6 +455,10 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
     if (bis_factory == nullptr) {
         bis_factory =
             std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory);
+        Core::System::GetInstance().RegisterContentProvider(
+            FileSys::ContentProviderUnionSlot::SysNAND, bis_factory->GetSystemNANDContents());
+        Core::System::GetInstance().RegisterContentProvider(
+            FileSys::ContentProviderUnionSlot::UserNAND, bis_factory->GetUserNANDContents());
     }
 
     if (save_data_factory == nullptr) {
@@ -465,6 +467,8 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
 
     if (sdmc_factory == nullptr) {
         sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory));
+        Core::System::GetInstance().RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
+                                                            sdmc_factory->GetSDMCContents());
     }
 }
 
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index 6fd5e7b23..6481f237c 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -46,7 +46,7 @@ ResultVal<FileSys::VirtualFile> OpenRomFSCurrentProcess();
 ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id,
                                           FileSys::ContentRecordType type);
 ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
-                                            FileSys::SaveDataDescriptor save_struct);
+                                            const FileSys::SaveDataDescriptor& descriptor);
 ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space);
 ResultVal<FileSys::VirtualDir> OpenSDMC();
 
@@ -54,8 +54,6 @@ FileSys::SaveDataSize ReadSaveDataSize(FileSys::SaveDataType type, u64 title_id,
 void WriteSaveDataSize(FileSys::SaveDataType type, u64 title_id, u128 user_id,
                        FileSys::SaveDataSize new_value);
 
-FileSys::RegisteredCacheUnion GetUnionContents();
-
 FileSys::RegisteredCache* GetSystemNANDContents();
 FileSys::RegisteredCache* GetUserNANDContents();
 FileSys::RegisteredCache* GetSDMCContents();
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 74c4e583b..657baddb8 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -315,61 +315,53 @@ public:
     void CreateFile(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
 
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        u64 mode = rp.Pop<u64>();
-        u32 size = rp.Pop<u32>();
+        const u64 mode = rp.Pop<u64>();
+        const u32 size = rp.Pop<u32>();
 
-        LOG_DEBUG(Service_FS, "called file {} mode 0x{:X} size 0x{:08X}", name, mode, size);
+        LOG_DEBUG(Service_FS, "called. file={}, mode=0x{:X}, size=0x{:08X}", name, mode, size);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.CreateFile(name, size));
     }
 
     void DeleteFile(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called file {}", name);
+        LOG_DEBUG(Service_FS, "called. file={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.DeleteFile(name));
     }
 
     void CreateDirectory(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called directory {}", name);
+        LOG_DEBUG(Service_FS, "called. directory={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.CreateDirectory(name));
     }
 
     void DeleteDirectory(Kernel::HLERequestContext& ctx) {
-        const IPC::RequestParser rp{ctx};
-
         const auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called directory {}", name);
+        LOG_DEBUG(Service_FS, "called. directory={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.DeleteDirectory(name));
     }
 
     void DeleteDirectoryRecursively(Kernel::HLERequestContext& ctx) {
-        const IPC::RequestParser rp{ctx};
-
         const auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called directory {}", name);
+        LOG_DEBUG(Service_FS, "called. directory={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.DeleteDirectoryRecursively(name));
@@ -386,18 +378,16 @@ public:
     }
 
     void RenameFile(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
         std::vector<u8> buffer;
         buffer.resize(ctx.BufferDescriptorX()[0].Size());
         Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
-        std::string src_name = Common::StringFromBuffer(buffer);
+        const std::string src_name = Common::StringFromBuffer(buffer);
 
         buffer.resize(ctx.BufferDescriptorX()[1].Size());
         Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
-        std::string dst_name = Common::StringFromBuffer(buffer);
+        const std::string dst_name = Common::StringFromBuffer(buffer);
 
-        LOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name);
+        LOG_DEBUG(Service_FS, "called. file '{}' to file '{}'", src_name, dst_name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.RenameFile(src_name, dst_name));
@@ -406,12 +396,12 @@ public:
     void OpenFile(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
 
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
+        const auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
 
-        LOG_DEBUG(Service_FS, "called file {} mode {}", name, static_cast<u32>(mode));
+        LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, static_cast<u32>(mode));
 
         auto result = backend.OpenFile(name, mode);
         if (result.Failed()) {
@@ -430,13 +420,13 @@ public:
     void OpenDirectory(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
 
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
         // TODO(Subv): Implement this filter.
-        u32 filter_flags = rp.Pop<u32>();
+        const u32 filter_flags = rp.Pop<u32>();
 
-        LOG_DEBUG(Service_FS, "called directory {} filter {}", name, filter_flags);
+        LOG_DEBUG(Service_FS, "called. directory={}, filter={}", name, filter_flags);
 
         auto result = backend.OpenDirectory(name);
         if (result.Failed()) {
@@ -453,12 +443,10 @@ public:
     }
 
     void GetEntryType(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called file {}", name);
+        LOG_DEBUG(Service_FS, "called. file={}", name);
 
         auto result = backend.GetEntryType(name);
         if (result.Failed()) {
@@ -616,7 +604,9 @@ private:
         u64_le save_id;
         u64_le title_id;
         u64_le save_image_size;
-        INSERT_PADDING_BYTES(0x28);
+        u16_le index;
+        FileSys::SaveDataRank rank;
+        INSERT_PADDING_BYTES(0x25);
     };
     static_assert(sizeof(SaveDataInfo) == 0x60, "SaveDataInfo has incorrect size.");
 
@@ -627,8 +617,8 @@ private:
 FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
     // clang-format off
     static const FunctionInfo functions[] = {
-        {0, nullptr, "MountContent"},
-        {1, &FSP_SRV::Initialize, "Initialize"},
+        {0, nullptr, "OpenFileSystem"},
+        {1, &FSP_SRV::SetCurrentProcess, "SetCurrentProcess"},
         {2, nullptr, "OpenDataFileSystemByCurrentProcess"},
         {7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"},
         {8, nullptr, "OpenFileSystemWithId"},
@@ -637,10 +627,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {12, nullptr, "OpenBisStorage"},
         {13, nullptr, "InvalidateBisCache"},
         {17, nullptr, "OpenHostFileSystem"},
-        {18, &FSP_SRV::MountSdCard, "MountSdCard"},
+        {18, &FSP_SRV::OpenSdCardFileSystem, "OpenSdCardFileSystem"},
         {19, nullptr, "FormatSdCardFileSystem"},
         {21, nullptr, "DeleteSaveDataFileSystem"},
-        {22, &FSP_SRV::CreateSaveData, "CreateSaveData"},
+        {22, &FSP_SRV::CreateSaveDataFileSystem, "CreateSaveDataFileSystem"},
         {23, nullptr, "CreateSaveDataFileSystemBySystemSaveDataId"},
         {24, nullptr, "RegisterSaveDataFileSystemAtomicDeletion"},
         {25, nullptr, "DeleteSaveDataFileSystemBySaveDataSpaceId"},
@@ -652,7 +642,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {32, nullptr, "ExtendSaveDataFileSystem"},
         {33, nullptr, "DeleteCacheStorage"},
         {34, nullptr, "GetCacheStorageSize"},
-        {51, &FSP_SRV::MountSaveData, "MountSaveData"},
+        {35, nullptr, "CreateSaveDataFileSystemByHashSalt"},
+        {51, &FSP_SRV::OpenSaveDataFileSystem, "OpenSaveDataFileSystem"},
         {52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"},
         {53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"},
         {57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"},
@@ -664,21 +655,26 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {64, nullptr, "OpenSaveDataInternalStorageFileSystem"},
         {65, nullptr, "UpdateSaveDataMacForDebug"},
         {66, nullptr, "WriteSaveDataFileSystemExtraData2"},
+        {67, nullptr, "FindSaveDataWithFilter"},
+        {68, nullptr, "OpenSaveDataInfoReaderBySaveDataFilter"},
         {80, nullptr, "OpenSaveDataMetaFile"},
         {81, nullptr, "OpenSaveDataTransferManager"},
         {82, nullptr, "OpenSaveDataTransferManagerVersion2"},
         {83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"},
+        {84, nullptr, "ListApplicationAccessibleSaveDataOwnerId"},
         {100, nullptr, "OpenImageDirectoryFileSystem"},
         {110, nullptr, "OpenContentStorageFileSystem"},
+        {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"},
         {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"},
         {201, nullptr, "OpenDataStorageByProgramId"},
         {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"},
-        {203, &FSP_SRV::OpenRomStorage, "OpenRomStorage"},
+        {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"},
         {400, nullptr, "OpenDeviceOperator"},
         {500, nullptr, "OpenSdCardDetectionEventNotifier"},
         {501, nullptr, "OpenGameCardDetectionEventNotifier"},
         {510, nullptr, "OpenSystemDataUpdateEventNotifier"},
         {511, nullptr, "NotifySystemDataUpdateEvent"},
+        {520, nullptr, "SimulateGameCardDetectionEvent"},
         {600, nullptr, "SetCurrentPosixTime"},
         {601, nullptr, "QuerySaveDataTotalSize"},
         {602, nullptr, "VerifySaveDataFileSystem"},
@@ -717,6 +713,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {1008, nullptr, "OpenRegisteredUpdatePartition"},
         {1009, nullptr, "GetAndClearMemoryReportInfo"},
         {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
+        {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
+        {1200, nullptr, "OpenMultiCommitManager"},
     };
     // clang-format on
     RegisterHandlers(functions);
@@ -724,8 +722,11 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
 
 FSP_SRV::~FSP_SRV() = default;
 
-void FSP_SRV::Initialize(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_FS, "(STUBBED) called");
+void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    current_process_id = rp.Pop<u64>();
+
+    LOG_DEBUG(Service_FS, "called. current_process_id=0x{:016X}", current_process_id);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
@@ -743,7 +744,7 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
     rb.Push(ResultCode(-1));
 }
 
-void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_FS, "called");
 
     IFileSystem filesystem(OpenSDMC().Unwrap());
@@ -753,7 +754,7 @@ void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) {
     rb.PushIpcInterface<IFileSystem>(std::move(filesystem));
 }
 
-void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
 
     auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>();
@@ -767,17 +768,18 @@ void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) {
     rb.Push(RESULT_SUCCESS);
 }
 
-void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-
-    auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>();
-    auto unk = rp.Pop<u32>();
-    LOG_INFO(Service_FS, "called with unknown={:08X}", unk);
+void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
+    LOG_INFO(Service_FS, "called.");
 
-    auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>();
+    struct Parameters {
+        FileSys::SaveDataSpaceId save_data_space_id;
+        FileSys::SaveDataDescriptor descriptor;
+    };
 
-    auto dir = OpenSaveData(space_id, save_struct);
+    IPC::RequestParser rp{ctx};
+    const auto parameters = rp.PopRaw<Parameters>();
 
+    auto dir = OpenSaveData(parameters.save_data_space_id, parameters.descriptor);
     if (dir.Failed()) {
         IPC::ResponseBuilder rb{ctx, 2, 0, 0};
         rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND);
@@ -793,7 +795,7 @@ void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) {
 
 void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem");
-    MountSaveData(ctx);
+    OpenSaveDataFileSystem(ctx);
 }
 
 void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) {
@@ -881,7 +883,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
     rb.PushIpcInterface<IStorage>(std::move(storage));
 }
 
-void FSP_SRV::OpenRomStorage(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
 
     auto storage_id = rp.PopRaw<FileSys::StorageId>();
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index e7abec0a3..d7572ba7a 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -19,19 +19,20 @@ public:
     ~FSP_SRV() override;
 
 private:
-    void Initialize(Kernel::HLERequestContext& ctx);
+    void SetCurrentProcess(Kernel::HLERequestContext& ctx);
     void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx);
-    void MountSdCard(Kernel::HLERequestContext& ctx);
-    void CreateSaveData(Kernel::HLERequestContext& ctx);
-    void MountSaveData(Kernel::HLERequestContext& ctx);
+    void OpenSdCardFileSystem(Kernel::HLERequestContext& ctx);
+    void CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx);
+    void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx);
     void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx);
     void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx);
     void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
     void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
     void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
-    void OpenRomStorage(Kernel::HLERequestContext& ctx);
+    void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
 
     FileSys::VirtualFile romfs;
+    u64 current_process_id = 0;
 };
 
 } // namespace Service::FileSystem
diff --git a/src/core/hle/service/hid/controllers/controller_base.h b/src/core/hle/service/hid/controllers/controller_base.h
index f0e092b1b..5e5097a03 100644
--- a/src/core/hle/service/hid/controllers/controller_base.h
+++ b/src/core/hle/service/hid/controllers/controller_base.h
@@ -7,6 +7,10 @@
 #include "common/common_types.h"
 #include "common/swap.h"
 
+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Service::HID {
 class ControllerBase {
 public:
@@ -20,7 +24,8 @@ public:
     virtual void OnRelease() = 0;
 
     // When the controller is requesting an update for the shared memory
-    virtual void OnUpdate(u8* data, std::size_t size) = 0;
+    virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                          std::size_t size) = 0;
 
     // Called when input devices should be loaded
     virtual void OnLoadInputDevices() = 0;
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index c22357d8c..c5c2e032a 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {}
 
 void Controller_DebugPad::OnRelease() {}
 
-void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                   std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/debug_pad.h b/src/core/hle/service/hid/controllers/debug_pad.h
index 68b734248..e584b92ec 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -26,7 +26,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
@@ -41,20 +41,20 @@ private:
     struct PadState {
         union {
             u32_le raw{};
-            BitField<0, 1, u32_le> a;
-            BitField<1, 1, u32_le> b;
-            BitField<2, 1, u32_le> x;
-            BitField<3, 1, u32_le> y;
-            BitField<4, 1, u32_le> l;
-            BitField<5, 1, u32_le> r;
-            BitField<6, 1, u32_le> zl;
-            BitField<7, 1, u32_le> zr;
-            BitField<8, 1, u32_le> plus;
-            BitField<9, 1, u32_le> minus;
-            BitField<10, 1, u32_le> d_left;
-            BitField<11, 1, u32_le> d_up;
-            BitField<12, 1, u32_le> d_right;
-            BitField<13, 1, u32_le> d_down;
+            BitField<0, 1, u32> a;
+            BitField<1, 1, u32> b;
+            BitField<2, 1, u32> x;
+            BitField<3, 1, u32> y;
+            BitField<4, 1, u32> l;
+            BitField<5, 1, u32> r;
+            BitField<6, 1, u32> zl;
+            BitField<7, 1, u32> zr;
+            BitField<8, 1, u32> plus;
+            BitField<9, 1, u32> minus;
+            BitField<10, 1, u32> d_left;
+            BitField<11, 1, u32> d_up;
+            BitField<12, 1, u32> d_right;
+            BitField<13, 1, u32> d_down;
         };
     };
     static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size");
@@ -62,7 +62,7 @@ private:
     struct Attributes {
         union {
             u32_le raw{};
-            BitField<0, 1, u32_le> connected;
+            BitField<0, 1, u32> connected;
         };
     };
     static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 898572277..a179252e3 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {}
 
 void Controller_Gesture::OnRelease() {}
 
-void Controller_Gesture::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                  std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.h b/src/core/hle/service/hid/controllers/gesture.h
index 1056ffbcd..f305fe90f 100644
--- a/src/core/hle/service/hid/controllers/gesture.h
+++ b/src/core/hle/service/hid/controllers/gesture.h
@@ -22,7 +22,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index ca75adc2b..92d7bfb52 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {}
 
 void Controller_Keyboard::OnRelease() {}
 
-void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                   std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.h b/src/core/hle/service/hid/controllers/keyboard.h
index f52775456..73cd2c7bb 100644
--- a/src/core/hle/service/hid/controllers/keyboard.h
+++ b/src/core/hle/service/hid/controllers/keyboard.h
@@ -25,7 +25,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 63391dbe9..11ab096d9 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default;
 void Controller_Mouse::OnInit() {}
 void Controller_Mouse::OnRelease() {}
 
-void Controller_Mouse::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/mouse.h b/src/core/hle/service/hid/controllers/mouse.h
index 70b654d07..9d46eecbe 100644
--- a/src/core/hle/service/hid/controllers/mouse.h
+++ b/src/core/hle/service/hid/controllers/mouse.h
@@ -24,7 +24,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 04c8c35a8..e7fc7a619 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
     rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX);
 }
 
-void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
+void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                               std::size_t data_len) {
     if (!IsControllerActivated())
         return;
     for (std::size_t i = 0; i < shared_memory_entries.size(); i++) {
@@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
             const auto& last_entry =
                 main_controller->npad[main_controller->common.last_entry_index];
 
-            main_controller->common.timestamp = CoreTiming::GetTicks();
+            main_controller->common.timestamp = core_timing.GetTicks();
             main_controller->common.last_entry_index =
                 (main_controller->common.last_entry_index + 1) % 17;
 
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 106cf58c8..4ff50b3cd 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -30,7 +30,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
@@ -39,13 +39,13 @@ public:
         union {
             u32_le raw{};
 
-            BitField<0, 1, u32_le> pro_controller;
-            BitField<1, 1, u32_le> handheld;
-            BitField<2, 1, u32_le> joycon_dual;
-            BitField<3, 1, u32_le> joycon_left;
-            BitField<4, 1, u32_le> joycon_right;
+            BitField<0, 1, u32> pro_controller;
+            BitField<1, 1, u32> handheld;
+            BitField<2, 1, u32> joycon_dual;
+            BitField<3, 1, u32> joycon_left;
+            BitField<4, 1, u32> joycon_right;
 
-            BitField<6, 1, u32_le> pokeball; // TODO(ogniK): Confirm when possible
+            BitField<6, 1, u32> pokeball; // TODO(ogniK): Confirm when possible
         };
     };
     static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size");
@@ -150,43 +150,43 @@ private:
         union {
             u64_le raw{};
             // Button states
-            BitField<0, 1, u64_le> a;
-            BitField<1, 1, u64_le> b;
-            BitField<2, 1, u64_le> x;
-            BitField<3, 1, u64_le> y;
-            BitField<4, 1, u64_le> l_stick;
-            BitField<5, 1, u64_le> r_stick;
-            BitField<6, 1, u64_le> l;
-            BitField<7, 1, u64_le> r;
-            BitField<8, 1, u64_le> zl;
-            BitField<9, 1, u64_le> zr;
-            BitField<10, 1, u64_le> plus;
-            BitField<11, 1, u64_le> minus;
+            BitField<0, 1, u64> a;
+            BitField<1, 1, u64> b;
+            BitField<2, 1, u64> x;
+            BitField<3, 1, u64> y;
+            BitField<4, 1, u64> l_stick;
+            BitField<5, 1, u64> r_stick;
+            BitField<6, 1, u64> l;
+            BitField<7, 1, u64> r;
+            BitField<8, 1, u64> zl;
+            BitField<9, 1, u64> zr;
+            BitField<10, 1, u64> plus;
+            BitField<11, 1, u64> minus;
 
             // D-Pad
-            BitField<12, 1, u64_le> d_left;
-            BitField<13, 1, u64_le> d_up;
-            BitField<14, 1, u64_le> d_right;
-            BitField<15, 1, u64_le> d_down;
+            BitField<12, 1, u64> d_left;
+            BitField<13, 1, u64> d_up;
+            BitField<14, 1, u64> d_right;
+            BitField<15, 1, u64> d_down;
 
             // Left JoyStick
-            BitField<16, 1, u64_le> l_stick_left;
-            BitField<17, 1, u64_le> l_stick_up;
-            BitField<18, 1, u64_le> l_stick_right;
-            BitField<19, 1, u64_le> l_stick_down;
+            BitField<16, 1, u64> l_stick_left;
+            BitField<17, 1, u64> l_stick_up;
+            BitField<18, 1, u64> l_stick_right;
+            BitField<19, 1, u64> l_stick_down;
 
             // Right JoyStick
-            BitField<20, 1, u64_le> r_stick_left;
-            BitField<21, 1, u64_le> r_stick_up;
-            BitField<22, 1, u64_le> r_stick_right;
-            BitField<23, 1, u64_le> r_stick_down;
+            BitField<20, 1, u64> r_stick_left;
+            BitField<21, 1, u64> r_stick_up;
+            BitField<22, 1, u64> r_stick_right;
+            BitField<23, 1, u64> r_stick_down;
 
             // Not always active?
-            BitField<24, 1, u64_le> left_sl;
-            BitField<25, 1, u64_le> left_sr;
+            BitField<24, 1, u64> left_sl;
+            BitField<25, 1, u64> left_sr;
 
-            BitField<26, 1, u64_le> right_sl;
-            BitField<27, 1, u64_le> right_sr;
+            BitField<26, 1, u64> right_sl;
+            BitField<27, 1, u64> right_sr;
         };
     };
     static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size");
@@ -200,12 +200,12 @@ private:
     struct ConnectionState {
         union {
             u32_le raw{};
-            BitField<0, 1, u32_le> IsConnected;
-            BitField<1, 1, u32_le> IsWired;
-            BitField<2, 1, u32_le> IsLeftJoyConnected;
-            BitField<3, 1, u32_le> IsLeftJoyWired;
-            BitField<4, 1, u32_le> IsRightJoyConnected;
-            BitField<5, 1, u32_le> IsRightJoyWired;
+            BitField<0, 1, u32> IsConnected;
+            BitField<1, 1, u32> IsWired;
+            BitField<2, 1, u32> IsLeftJoyConnected;
+            BitField<3, 1, u32> IsLeftJoyWired;
+            BitField<4, 1, u32> IsRightJoyConnected;
+            BitField<5, 1, u32> IsRightJoyWired;
         };
     };
     static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size");
@@ -240,23 +240,23 @@ private:
     struct NPadProperties {
         union {
             s64_le raw{};
-            BitField<11, 1, s64_le> is_vertical;
-            BitField<12, 1, s64_le> is_horizontal;
-            BitField<13, 1, s64_le> use_plus;
-            BitField<14, 1, s64_le> use_minus;
+            BitField<11, 1, s64> is_vertical;
+            BitField<12, 1, s64> is_horizontal;
+            BitField<13, 1, s64> use_plus;
+            BitField<14, 1, s64> use_minus;
         };
     };
 
     struct NPadDevice {
         union {
             u32_le raw{};
-            BitField<0, 1, s32_le> pro_controller;
-            BitField<1, 1, s32_le> handheld;
-            BitField<2, 1, s32_le> handheld_left;
-            BitField<3, 1, s32_le> handheld_right;
-            BitField<4, 1, s32_le> joycon_left;
-            BitField<5, 1, s32_le> joycon_right;
-            BitField<6, 1, s32_le> pokeball;
+            BitField<0, 1, s32> pro_controller;
+            BitField<1, 1, s32> handheld;
+            BitField<2, 1, s32> handheld_left;
+            BitField<3, 1, s32> handheld_right;
+            BitField<4, 1, s32> joycon_left;
+            BitField<5, 1, s32> joycon_right;
+            BitField<6, 1, s32> pokeball;
         };
     };
 
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 02fcfadd9..946948f5e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {}
 
 void Controller_Stubbed::OnRelease() {}
 
-void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) {
+void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                  std::size_t size) {
     if (!smart_update) {
         return;
     }
 
     CommonHeader header{};
-    header.timestamp = CoreTiming::GetTicks();
+    header.timestamp = core_timing.GetTicks();
     header.total_entry_count = 17;
     header.entry_count = 0;
     header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/stubbed.h b/src/core/hle/service/hid/controllers/stubbed.h
index 4a21c643e..24469f03e 100644
--- a/src/core/hle/service/hid/controllers/stubbed.h
+++ b/src/core/hle/service/hid/controllers/stubbed.h
@@ -20,7 +20,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index f666b1bd8..1a8445a43 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {}
 
 void Controller_Touchscreen::OnRelease() {}
 
-void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                      std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
@@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
         touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
         touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
         touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
-        const u64 tick = CoreTiming::GetTicks();
+        const u64 tick = core_timing.GetTicks();
         touch_entry.delta_time = tick - last_touch;
         last_touch = tick;
         touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index 94cd0eba9..76fc340e9 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -24,7 +24,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
@@ -33,8 +33,8 @@ private:
     struct Attributes {
         union {
             u32 raw{};
-            BitField<0, 1, u32_le> start_touch;
-            BitField<1, 1, u32_le> end_touch;
+            BitField<0, 1, u32> start_touch;
+            BitField<1, 1, u32> end_touch;
         };
     };
     static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index cd397c70b..1a9da9576 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {}
 
 void Controller_XPad::OnRelease() {}
 
-void Controller_XPad::OnUpdate(u8* data, std::size_t size) {
+void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                               std::size_t size) {
     for (auto& xpad_entry : shared_memory.shared_memory_entries) {
-        xpad_entry.header.timestamp = CoreTiming::GetTicks();
+        xpad_entry.header.timestamp = core_timing.GetTicks();
         xpad_entry.header.total_entry_count = 17;
 
         if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/xpad.h b/src/core/hle/service/hid/controllers/xpad.h
index ff836989f..2864e6617 100644
--- a/src/core/hle/service/hid/controllers/xpad.h
+++ b/src/core/hle/service/hid/controllers/xpad.h
@@ -22,7 +22,7 @@ public:
     void OnRelease() override;
 
     // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
 
     // Called when input devices should be loaded
     void OnLoadInputDevices() override;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 008bf3f02..63b55758b 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {
 
 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66;
-constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
-constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
+constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
+constexpr s64 accelerometer_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+constexpr s64 gyroscope_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
 
 IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -73,14 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
     GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);
 
     // Register update callbacks
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
     pad_update_event =
-        CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
             UpdateControllers(userdata, cycles_late);
         });
 
     // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
 
-    CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event);
+    core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);
 
     ReloadInputDevices();
 }
@@ -94,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) {
 }
 
 IAppletResource ::~IAppletResource() {
-    CoreTiming::UnscheduleEvent(pad_update_event, 0);
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0);
 }
 
 void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -105,16 +106,18 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
     rb.PushCopyObjects(shared_mem);
 }
 
-void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
+void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
+
     const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
     for (const auto& controller : controllers) {
         if (should_reload) {
             controller->OnLoadInputDevices();
         }
-        controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
+        controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
     }
 
-    CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
+    core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
 }
 
 class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index eca27c056..d3660cad2 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -4,10 +4,13 @@
 
 #pragma once
 
+#include "core/hle/service/hid/controllers/controller_base.h"
+#include "core/hle/service/service.h"
+
 #include "controllers/controller_base.h"
 #include "core/hle/service/service.h"
 
-namespace CoreTiming {
+namespace Core::Timing {
 struct EventType;
 }
 
@@ -15,7 +18,7 @@ namespace Kernel {
 class SharedMemory;
 }
 
-namespace SM {
+namespace Service::SM {
 class ServiceManager;
 }
 
@@ -62,11 +65,11 @@ private:
     }
 
     void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
-    void UpdateControllers(u64 userdata, int cycles_late);
+    void UpdateControllers(u64 userdata, s64 cycles_late);
 
     Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
 
-    CoreTiming::EventType* pad_update_event;
+    Core::Timing::EventType* pad_update_event;
 
     std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
         controllers{};
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 3c7f8b1ee..2c4625c99 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
 
     IPC::ResponseBuilder rb{ctx, 5};
     rb.Push(RESULT_SUCCESS);
-    rb.PushRaw<u64>(CoreTiming::GetTicks());
+    rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks());
     rb.PushRaw<u32>(0);
 }
 
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9df7ac50f..d65693fc7 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -319,15 +319,14 @@ public:
         }
 
         ASSERT(vm_manager
-                   .MirrorMemory(*map_address, nro_addr, nro_size,
-                                 Kernel::MemoryState::ModuleCodeStatic)
+                   .MirrorMemory(*map_address, nro_addr, nro_size, Kernel::MemoryState::ModuleCode)
                    .IsSuccess());
         ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess());
 
         if (bss_size > 0) {
             ASSERT(vm_manager
                        .MirrorMemory(*map_address + nro_size, bss_addr, bss_size,
-                                     Kernel::MemoryState::ModuleCodeStatic)
+                                     Kernel::MemoryState::ModuleCode)
                        .IsSuccess());
             ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess());
         }
@@ -388,8 +387,7 @@ public:
         const auto& nro_size = iter->second.size;
 
         ASSERT(vm_manager
-                   .MirrorMemory(heap_addr, mapped_addr, nro_size,
-                                 Kernel::MemoryState::ModuleCodeStatic)
+                   .MirrorMemory(heap_addr, mapped_addr, nro_size, Kernel::MemoryState::ModuleCode)
                    .IsSuccess());
         ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess());
 
diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp
index 1f462e087..2a61593e2 100644
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -42,7 +42,7 @@ private:
         union {
             BitField<0, 16, Flags> flags;
             BitField<16, 8, Severity> severity;
-            BitField<24, 8, u32_le> verbosity;
+            BitField<24, 8, u32> verbosity;
         };
         u32_le payload_size;
 
diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp
index 0297edca0..5d31f638f 100644
--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -40,10 +40,10 @@ public:
             {6, nullptr, "CloseContentStorageForcibly"},
             {7, nullptr, "CloseContentMetaDatabaseForcibly"},
             {8, nullptr, "CleanupContentMetaDatabase"},
-            {9, nullptr, "OpenContentStorage2"},
-            {10, nullptr, "CloseContentStorage"},
-            {11, nullptr, "OpenContentMetaDatabase2"},
-            {12, nullptr, "CloseContentMetaDatabase"},
+            {9, nullptr, "ActivateContentStorage"},
+            {10, nullptr, "InactivateContentStorage"},
+            {11, nullptr, "ActivateContentMetaDatabase"},
+            {12, nullptr, "InactivateContentMetaDatabase"},
         };
         // clang-format on
 
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index 5c62d42ba..ca88bf97f 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -150,7 +150,7 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.PushRaw<u8>(Settings::values.enable_nfc);
+        rb.PushRaw<u8>(true);
     }
 
     void GetStateOld(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 1c4482e47..c6babdd4d 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -335,7 +335,7 @@ void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
 }
 
 bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) {
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     if (buffer.size() < sizeof(AmiiboFile)) {
         return false;
     }
diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp
index 2663f56b1..0eb04037a 100644
--- a/src/core/hle/service/ns/ns.cpp
+++ b/src/core/hle/service/ns/ns.cpp
@@ -43,7 +43,7 @@ public:
             {11, nullptr, "CalculateApplicationOccupiedSize"},
             {16, nullptr, "PushApplicationRecord"},
             {17, nullptr, "ListApplicationRecordContentMeta"},
-            {19, nullptr, "LaunchApplication"},
+            {19, nullptr, "LaunchApplicationOld"},
             {21, nullptr, "GetApplicationContentPath"},
             {22, nullptr, "TerminateApplication"},
             {23, nullptr, "ResolveApplicationContentPath"},
@@ -96,10 +96,10 @@ public:
             {86, nullptr, "EnableApplicationCrashReport"},
             {87, nullptr, "IsApplicationCrashReportEnabled"},
             {90, nullptr, "BoostSystemMemoryResourceLimit"},
-            {91, nullptr, "Unknown1"},
-            {92, nullptr, "Unknown2"},
+            {91, nullptr, "DeprecatedLaunchApplication"},
+            {92, nullptr, "GetRunningApplicationProgramId"},
             {93, nullptr, "GetMainApplicationProgramIndex"},
-            {94, nullptr, "LaunchApplication2"},
+            {94, nullptr, "LaunchApplication"},
             {95, nullptr, "GetApplicationLaunchInfo"},
             {96, nullptr, "AcquireApplicationLaunchInfo"},
             {97, nullptr, "GetMainApplicationProgramIndex2"},
@@ -163,7 +163,7 @@ public:
             {907, nullptr, "WithdrawApplicationUpdateRequest"},
             {908, nullptr, "ListApplicationRecordInstalledContentMeta"},
             {909, nullptr, "WithdrawCleanupAddOnContentsWithNoRightsRecommendation"},
-            {910, nullptr, "Unknown3"},
+            {910, nullptr, "HasApplicationRecord"},
             {911, nullptr, "SetPreInstalledApplication"},
             {912, nullptr, "ClearPreInstalledApplicationFlag"},
             {1000, nullptr, "RequestVerifyApplicationDeprecated"},
@@ -219,10 +219,10 @@ public:
             {2015, nullptr, "CompareSystemDeliveryInfo"},
             {2016, nullptr, "ListNotCommittedContentMeta"},
             {2017, nullptr, "CreateDownloadTask"},
-            {2018, nullptr, "Unknown4"},
-            {2050, nullptr, "Unknown5"},
-            {2100, nullptr, "Unknown6"},
-            {2101, nullptr, "Unknown7"},
+            {2018, nullptr, "GetApplicationDeliveryInfoHash"},
+            {2050, nullptr, "GetApplicationRightsOnClient"},
+            {2100, nullptr, "GetApplicationTerminateResult"},
+            {2101, nullptr, "GetRawApplicationTerminateResult"},
             {2150, nullptr, "CreateRightsEnvironment"},
             {2151, nullptr, "DestroyRightsEnvironment"},
             {2152, nullptr, "ActivateRightsEnvironment"},
@@ -237,10 +237,10 @@ public:
             {2182, nullptr, "SetActiveRightsContextUsingStateToRightsEnvironment"},
             {2190, nullptr, "GetRightsEnvironmentHandleForApplication"},
             {2199, nullptr, "GetRightsEnvironmentCountForDebug"},
-            {2200, nullptr, "Unknown8"},
-            {2201, nullptr, "Unknown9"},
-            {2250, nullptr, "Unknown10"},
-            {2300, nullptr, "Unknown11"},
+            {2200, nullptr, "GetGameCardApplicationCopyIdentifier"},
+            {2201, nullptr, "GetInstalledApplicationCopyIdentifier"},
+            {2250, nullptr, "RequestReportActiveELicence"},
+            {2300, nullptr, "ListEventLog"},
         };
         // clang-format on
 
@@ -355,6 +355,7 @@ public:
         static const FunctionInfo functions[] = {
             {21, nullptr, "GetApplicationContentPath"},
             {23, nullptr, "ResolveApplicationContentPath"},
+            {93, nullptr, "GetRunningApplicationProgramId"},
         };
         // clang-format on
 
@@ -389,6 +390,11 @@ public:
         // clang-format off
         static const FunctionInfo functions[] = {
             {0, nullptr, "RequestLinkDevice"},
+            {1, nullptr, "RequestCleanupAllPreInstalledApplications"},
+            {2, nullptr, "RequestCleanupPreInstalledApplication"},
+            {3, nullptr, "RequestSyncRights"},
+            {4, nullptr, "RequestUnlinkDevice"},
+            {5, nullptr, "RequestRevokeAllELicense"},
         };
         // clang-format on
 
@@ -403,7 +409,7 @@ public:
         static const FunctionInfo functions[] = {
             {100, nullptr, "ResetToFactorySettings"},
             {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"},
-            {102, nullptr, "ResetToFactorySettingsForRefurbishment "},
+            {102, nullptr, "ResetToFactorySettingsForRefurbishment"},
         };
         // clang-format on
 
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 0f02a1a18..4f6042b00 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -19,11 +19,11 @@ public:
     virtual ~nvdevice() = default;
     union Ioctl {
         u32_le raw;
-        BitField<0, 8, u32_le> cmd;
-        BitField<8, 8, u32_le> group;
-        BitField<16, 14, u32_le> length;
-        BitField<30, 1, u32_le> is_in;
-        BitField<31, 1, u32_le> is_out;
+        BitField<0, 8, u32> cmd;
+        BitField<8, 8, u32> group;
+        BitField<16, 14, u32> length;
+        BitField<30, 1, u32> is_in;
+        BitField<31, 1, u32> is_out;
     };
 
     /**
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 92acc57b1..20c7c39aa 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,11 +23,11 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
 
 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                         u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
-                        const MathUtil::Rectangle<int>& crop_rect) {
+                        const Common::Rectangle<int>& crop_rect) {
     VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
-    LOG_WARNING(Service,
-                "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
-                addr, offset, width, height, stride, format);
+    LOG_TRACE(Service,
+              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
+              addr, offset, width, height, stride, format);
 
     using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
     const Tegra::FramebufferConfig framebuffer{
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
 
     auto& instance = Core::System::GetInstance();
     instance.GetPerfStats().EndGameFrame();
-    instance.Renderer().SwapBuffers(framebuffer);
+    instance.GPU().SwapBuffers(framebuffer);
 }
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index a45086e45..12f3ef825 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -18,14 +18,14 @@ class nvmap;
 class nvdisp_disp0 final : public nvdevice {
 public:
     explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev);
-    ~nvdisp_disp0();
+    ~nvdisp_disp0() override;
 
     u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
 
     /// Performs a screen flip, drawing the buffer pointed to by the handle.
     void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
               NVFlinger::BufferQueue::BufferTransformFlags transform,
-              const MathUtil::Rectangle<int>& crop_rect);
+              const Common::Rectangle<int>& crop_rect);
 
 private:
     std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 466db7ccd..af62d33d2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
@@ -88,7 +89,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
     for (const auto& entry : entries) {
         LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
                     entry.offset, entry.nvmap_handle, entry.pages);
-        Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
+        GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10;
         auto object = nvmap_dev->GetObject(entry.nvmap_handle);
         if (!object) {
             LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
@@ -101,7 +102,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
         u64 size = static_cast<u64>(entry.pages) << 0x10;
         ASSERT(size <= object->size);
 
-        Tegra::GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
+        GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
         ASSERT(returned == offset);
     }
     std::memcpy(output.data(), entries.data(), output.size());
@@ -172,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
         return 0;
     }
 
-    auto& system_instance = Core::System::GetInstance();
-
-    // Remove this memory region from the rasterizer cache.
-    auto& gpu = system_instance.GPU();
-    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
-    ASSERT(cpu_addr);
-    system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
-
-    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
-
+    params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset,
+                                                                                  itr->second.size);
     buffer_mappings.erase(itr->second.offset);
 
     std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index d57a54ee8..45812d238 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -5,6 +5,7 @@
 #include <cstring>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
@@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
 
     IoctlGetGpuTime params{};
     std::memcpy(&params, input.data(), input.size());
-    params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks());
+    params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
     std::memcpy(output.data(), &params, output.size());
     return 0;
 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 0a650f36c..8ce7bc7a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
     return 0;
 }
 
-static void PushGPUEntries(Tegra::CommandList&& entries) {
-    if (entries.empty()) {
-        return;
-    }
-
-    auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
-    dma_pusher.Push(std::move(entries));
-    dma_pusher.DispatchCalls();
-}
-
 u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
     if (input.size() < sizeof(IoctlSubmitGpfifo)) {
         UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
                 params.num_entries * sizeof(Tegra::CommandListHeader));
 
-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
 
     params.fence_out.id = 0;
     params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
     Memory::ReadBlock(params.address, entries.data(),
                       params.num_entries * sizeof(Tegra::CommandListHeader));
 
-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
 
     params.fence_out.id = 0;
     params.fence_out.value = 0;
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index fe311b069..5b4889910 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -17,7 +17,7 @@ namespace Service::Nvidia {
 class NVDRV final : public ServiceFramework<NVDRV> {
 public:
     NVDRV(std::shared_ptr<Module> nvdrv, const char* name);
-    ~NVDRV();
+    ~NVDRV() override;
 
 private:
     void Open(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/nvdrv/nvmemp.h b/src/core/hle/service/nvdrv/nvmemp.h
index 5a4dfc1f9..6eafb1346 100644
--- a/src/core/hle/service/nvdrv/nvmemp.h
+++ b/src/core/hle/service/nvdrv/nvmemp.h
@@ -11,7 +11,7 @@ namespace Service::Nvidia {
 class NVMEMP final : public ServiceFramework<NVMEMP> {
 public:
     NVMEMP();
-    ~NVMEMP();
+    ~NVMEMP() override;
 
 private:
     void Cmd0(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index fc07d9bb8..4d150fc71 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
 }
 
 void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
-                              const MathUtil::Rectangle<int>& crop_rect) {
+                              const Common::Rectangle<int>& crop_rect) {
     auto itr = std::find_if(queue.begin(), queue.end(),
                             [&](const Buffer& buffer) { return buffer.slot == slot; });
     ASSERT(itr != queue.end());
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index b171f256c..e1ccb6171 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -13,10 +13,6 @@
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/writable_event.h"
 
-namespace CoreTiming {
-struct EventType;
-}
-
 namespace Service::NVFlinger {
 
 struct IGBPBuffer {
@@ -71,14 +67,14 @@ public:
         Status status = Status::Free;
         IGBPBuffer igbp_buffer;
         BufferTransformFlags transform;
-        MathUtil::Rectangle<int> crop_rect;
+        Common::Rectangle<int> crop_rect;
     };
 
     void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
     std::optional<u32> DequeueBuffer(u32 width, u32 height);
     const IGBPBuffer& RequestBuffer(u32 slot) const;
     void QueueBuffer(u32 slot, BufferTransformFlags transform,
-                     const MathUtil::Rectangle<int>& crop_rect);
+                     const Common::Rectangle<int>& crop_rect);
     std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
     void ReleaseBuffer(u32 slot);
     u32 Query(QueryType type);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 6a613aeab..c7f5bbf28 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -5,7 +5,6 @@
 #include <algorithm>
 #include <optional>
 
-#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
@@ -15,124 +14,170 @@
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
+#include "core/hle/service/vi/display/vi_display.h"
+#include "core/hle/service/vi/layer/vi_layer.h"
 #include "core/perf_stats.h"
 #include "video_core/renderer_base.h"
-#include "video_core/video_core.h"
 
 namespace Service::NVFlinger {
 
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
 
-NVFlinger::NVFlinger() {
-    // Add the different displays to the list of displays.
+NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
     displays.emplace_back(0, "Default");
     displays.emplace_back(1, "External");
     displays.emplace_back(2, "Edid");
     displays.emplace_back(3, "Internal");
+    displays.emplace_back(4, "Null");
 
     // Schedule the screen composition events
     composition_event =
-        CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
             Compose();
-            CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event);
+            this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
         });
 
-    CoreTiming::ScheduleEvent(frame_ticks, composition_event);
+    core_timing.ScheduleEvent(frame_ticks, composition_event);
 }
 
 NVFlinger::~NVFlinger() {
-    CoreTiming::UnscheduleEvent(composition_event, 0);
+    core_timing.UnscheduleEvent(composition_event, 0);
 }
 
 void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
     nvdrv = std::move(instance);
 }
 
-u64 NVFlinger::OpenDisplay(std::string_view name) {
-    LOG_WARNING(Service, "Opening display {}", name);
+std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
+    LOG_DEBUG(Service, "Opening \"{}\" display", name);
 
     // TODO(Subv): Currently we only support the Default display.
     ASSERT(name == "Default");
 
-    auto itr = std::find_if(displays.begin(), displays.end(),
-                            [&](const Display& display) { return display.name == name; });
-
-    ASSERT(itr != displays.end());
+    const auto itr =
+        std::find_if(displays.begin(), displays.end(),
+                     [&](const VI::Display& display) { return display.GetName() == name; });
+    if (itr == displays.end()) {
+        return {};
+    }
 
-    return itr->id;
+    return itr->GetID();
 }
 
-u64 NVFlinger::CreateLayer(u64 display_id) {
-    auto& display = GetDisplay(display_id);
+std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
+    auto* const display = FindDisplay(display_id);
 
-    ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment");
+    if (display == nullptr) {
+        return {};
+    }
 
-    u64 layer_id = next_layer_id++;
-    u32 buffer_queue_id = next_buffer_queue_id++;
-    auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
-    display.layers.emplace_back(layer_id, buffer_queue);
-    buffer_queues.emplace_back(std::move(buffer_queue));
+    const u64 layer_id = next_layer_id++;
+    const u32 buffer_queue_id = next_buffer_queue_id++;
+    buffer_queues.emplace_back(buffer_queue_id, layer_id);
+    display->CreateLayer(layer_id, buffer_queues.back());
     return layer_id;
 }
 
-u32 NVFlinger::GetBufferQueueId(u64 display_id, u64 layer_id) {
-    const auto& layer = GetLayer(display_id, layer_id);
-    return layer.buffer_queue->GetId();
+std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
+    const auto* const layer = FindLayer(display_id, layer_id);
+
+    if (layer == nullptr) {
+        return {};
+    }
+
+    return layer->GetBufferQueue().GetId();
 }
 
-Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) {
-    return GetDisplay(display_id).vsync_event.readable;
+Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
+    auto* const display = FindDisplay(display_id);
+
+    if (display == nullptr) {
+        return nullptr;
+    }
+
+    return display->GetVSyncEvent();
 }
 
-std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const {
-    auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                            [&](const auto& queue) { return queue->GetId() == id; });
+BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
+    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
+                                  [id](const auto& queue) { return queue.GetId() == id; });
 
     ASSERT(itr != buffer_queues.end());
     return *itr;
 }
 
-Display& NVFlinger::GetDisplay(u64 display_id) {
-    auto itr = std::find_if(displays.begin(), displays.end(),
-                            [&](const Display& display) { return display.id == display_id; });
+const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
+    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
+                                  [id](const auto& queue) { return queue.GetId() == id; });
 
-    ASSERT(itr != displays.end());
+    ASSERT(itr != buffer_queues.end());
     return *itr;
 }
 
-Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) {
-    auto& display = GetDisplay(display_id);
+VI::Display* NVFlinger::FindDisplay(u64 display_id) {
+    const auto itr =
+        std::find_if(displays.begin(), displays.end(),
+                     [&](const VI::Display& display) { return display.GetID() == display_id; });
 
-    auto itr = std::find_if(display.layers.begin(), display.layers.end(),
-                            [&](const Layer& layer) { return layer.id == layer_id; });
+    if (itr == displays.end()) {
+        return nullptr;
+    }
 
-    ASSERT(itr != display.layers.end());
-    return *itr;
+    return &*itr;
+}
+
+const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
+    const auto itr =
+        std::find_if(displays.begin(), displays.end(),
+                     [&](const VI::Display& display) { return display.GetID() == display_id; });
+
+    if (itr == displays.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
+}
+
+VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
+    auto* const display = FindDisplay(display_id);
+
+    if (display == nullptr) {
+        return nullptr;
+    }
+
+    return display->FindLayer(layer_id);
+}
+
+const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
+    const auto* const display = FindDisplay(display_id);
+
+    if (display == nullptr) {
+        return nullptr;
+    }
+
+    return display->FindLayer(layer_id);
 }
 
 void NVFlinger::Compose() {
     for (auto& display : displays) {
         // Trigger vsync for this display at the end of drawing
-        SCOPE_EXIT({ display.vsync_event.writable->Signal(); });
+        SCOPE_EXIT({ display.SignalVSyncEvent(); });
 
         // Don't do anything for displays without layers.
-        if (display.layers.empty())
+        if (!display.HasLayers())
             continue;
 
         // TODO(Subv): Support more than 1 layer.
-        ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
-
-        Layer& layer = display.layers[0];
-        auto& buffer_queue = layer.buffer_queue;
+        VI::Layer& layer = display.GetLayer(0);
+        auto& buffer_queue = layer.GetBufferQueue();
 
         // Search for a queued buffer and acquire it
-        auto buffer = buffer_queue->AcquireBuffer();
+        auto buffer = buffer_queue.AcquireBuffer();
 
         MicroProfileFlip();
 
@@ -141,11 +186,11 @@ void NVFlinger::Compose() {
 
             // There was no queued buffer to draw, render previous frame
             system_instance.GetPerfStats().EndGameFrame();
-            system_instance.Renderer().SwapBuffers({});
+            system_instance.GPU().SwapBuffers({});
             continue;
         }
 
-        auto& igbp_buffer = buffer->get().igbp_buffer;
+        const auto& igbp_buffer = buffer->get().igbp_buffer;
 
         // Now send the buffer to the GPU for drawing.
         // TODO(Subv): Support more than just disp0. The display device selection is probably based
@@ -157,19 +202,8 @@ void NVFlinger::Compose() {
                      igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                      buffer->get().transform, buffer->get().crop_rect);
 
-        buffer_queue->ReleaseBuffer(buffer->get().slot);
+        buffer_queue.ReleaseBuffer(buffer->get().slot);
     }
 }
 
-Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
-Layer::~Layer() = default;
-
-Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
-    auto& kernel = Core::System::GetInstance().Kernel();
-    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                         fmt::format("Display VSync Event {}", id));
-}
-
-Display::~Display() = default;
-
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 9abba555b..c0a83fffb 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <memory>
+#include <optional>
 #include <string>
 #include <string_view>
 #include <vector>
@@ -12,9 +13,10 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
 
-namespace CoreTiming {
+namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing
 
 namespace Kernel {
 class ReadableEvent;
@@ -23,69 +25,72 @@ class WritableEvent;
 
 namespace Service::Nvidia {
 class Module;
-}
+} // namespace Service::Nvidia
+
+namespace Service::VI {
+class Display;
+class Layer;
+} // namespace Service::VI
 
 namespace Service::NVFlinger {
 
 class BufferQueue;
 
-struct Layer {
-    Layer(u64 id, std::shared_ptr<BufferQueue> queue);
-    ~Layer();
-
-    u64 id;
-    std::shared_ptr<BufferQueue> buffer_queue;
-};
-
-struct Display {
-    Display(u64 id, std::string name);
-    ~Display();
-
-    u64 id;
-    std::string name;
-
-    std::vector<Layer> layers;
-    Kernel::EventPair vsync_event;
-};
-
 class NVFlinger final {
 public:
-    NVFlinger();
+    explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
     ~NVFlinger();
 
     /// Sets the NVDrv module instance to use to send buffers to the GPU.
     void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance);
 
-    /// Opens the specified display and returns the id.
-    u64 OpenDisplay(std::string_view name);
+    /// Opens the specified display and returns the ID.
+    ///
+    /// If an invalid display name is provided, then an empty optional is returned.
+    std::optional<u64> OpenDisplay(std::string_view name);
 
-    /// Creates a layer on the specified display and returns the layer id.
-    u64 CreateLayer(u64 display_id);
+    /// Creates a layer on the specified display and returns the layer ID.
+    ///
+    /// If an invalid display ID is specified, then an empty optional is returned.
+    std::optional<u64> CreateLayer(u64 display_id);
 
-    /// Gets the buffer queue id of the specified layer in the specified display.
-    u32 GetBufferQueueId(u64 display_id, u64 layer_id);
+    /// Finds the buffer queue ID of the specified layer in the specified display.
+    ///
+    /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
+    std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const;
 
     /// Gets the vsync event for the specified display.
-    Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id);
+    ///
+    /// If an invalid display ID is provided, then nullptr is returned.
+    Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
+
+    /// Obtains a buffer queue identified by the ID.
+    BufferQueue& FindBufferQueue(u32 id);
 
-    /// Obtains a buffer queue identified by the id.
-    std::shared_ptr<BufferQueue> GetBufferQueue(u32 id) const;
+    /// Obtains a buffer queue identified by the ID.
+    const BufferQueue& FindBufferQueue(u32 id) const;
 
     /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
     /// finished.
     void Compose();
 
 private:
-    /// Returns the display identified by the specified id.
-    Display& GetDisplay(u64 display_id);
+    /// Finds the display identified by the specified ID.
+    VI::Display* FindDisplay(u64 display_id);
 
-    /// Returns the layer identified by the specified id in the desired display.
-    Layer& GetLayer(u64 display_id, u64 layer_id);
+    /// Finds the display identified by the specified ID.
+    const VI::Display* FindDisplay(u64 display_id) const;
+
+    /// Finds the layer identified by the specified ID in the desired display.
+    VI::Layer* FindLayer(u64 display_id, u64 layer_id);
+
+    /// Finds the layer identified by the specified ID in the desired display.
+    const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
 
     std::shared_ptr<Nvidia::Module> nvdrv;
 
-    std::vector<Display> displays;
-    std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
+    std::vector<VI::Display> displays;
+    std::vector<BufferQueue> buffer_queues;
 
     /// Id to use for the next layer that is created, this counter is shared among all displays.
     u64 next_layer_id = 1;
@@ -93,8 +98,11 @@ private:
     /// layers.
     u32 next_buffer_queue_id = 1;
 
-    /// CoreTiming event that handles screen composition.
-    CoreTiming::EventType* composition_event;
+    /// Event that handles screen composition.
+    Core::Timing::EventType* composition_event;
+
+    /// Core timing instance for registering/unregistering the composition event.
+    Core::Timing::CoreTiming& core_timing;
 };
 
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp
index 53e7da9c3..6b27dc4a3 100644
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -13,7 +13,7 @@ public:
     explicit BootMode() : ServiceFramework{"pm:bm"} {
         static const FunctionInfo functions[] = {
             {0, &BootMode::GetBootMode, "GetBootMode"},
-            {1, nullptr, "SetMaintenanceBoot"},
+            {1, &BootMode::SetMaintenanceBoot, "SetMaintenanceBoot"},
         };
         RegisterHandlers(functions);
     }
@@ -24,8 +24,19 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.Push<u32>(static_cast<u32>(SystemBootMode::Normal)); // Normal boot mode
+        rb.PushEnum(boot_mode);
     }
+
+    void SetMaintenanceBoot(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_PM, "called");
+
+        boot_mode = SystemBootMode::Maintenance;
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    SystemBootMode boot_mode = SystemBootMode::Normal;
 };
 
 class DebugMonitor final : public ServiceFramework<DebugMonitor> {
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h
index 370f2ed72..cc8d3f215 100644
--- a/src/core/hle/service/pm/pm.h
+++ b/src/core/hle/service/pm/pm.h
@@ -9,7 +9,12 @@ class ServiceManager;
 }
 
 namespace Service::PM {
-enum class SystemBootMode : u32 { Normal = 0, Maintenance = 1 };
+
+enum class SystemBootMode {
+    Normal,
+    Maintenance,
+};
+
 /// Registers all PM services with the specified service manager.
 void InstallInterfaces(SM::ServiceManager& service_manager);
 
diff --git a/src/core/hle/service/psc/psc.cpp b/src/core/hle/service/psc/psc.cpp
index 0ba0a4076..53ec6b031 100644
--- a/src/core/hle/service/psc/psc.cpp
+++ b/src/core/hle/service/psc/psc.cpp
@@ -17,13 +17,13 @@ public:
     explicit PSC_C() : ServiceFramework{"psc:c"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "Unknown1"},
-            {1, nullptr, "Unknown2"},
-            {2, nullptr, "Unknown3"},
-            {3, nullptr, "Unknown4"},
-            {4, nullptr, "Unknown5"},
-            {5, nullptr, "Unknown6"},
-            {6, nullptr, "Unknown7"},
+            {0, nullptr, "Initialize"},
+            {1, nullptr, "DispatchRequest"},
+            {2, nullptr, "GetResult"},
+            {3, nullptr, "GetState"},
+            {4, nullptr, "Cancel"},
+            {5, nullptr, "PrintModuleInformation"},
+            {6, nullptr, "GetModuleInformation"},
         };
         // clang-format on
 
@@ -39,7 +39,8 @@ public:
             {0, nullptr, "Initialize"},
             {1, nullptr, "GetRequest"},
             {2, nullptr, "Acknowledge"},
-            {3, nullptr, "Unknown1"},
+            {3, nullptr, "Finalize"},
+            {4, nullptr, "AcknowledgeEx"},
         };
         // clang-format on
 
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index d25b80ab0..00806b0ed 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -11,7 +11,6 @@
 #include "core/hle/ipc.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_port.h"
@@ -76,7 +75,8 @@ namespace Service {
  * Creates a function string for logging, complete with the name (or header code, depending
  * on what's passed in) the port name, and all the cmd_buff arguments.
  */
-[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name,
+[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
+                                                       std::string_view port_name,
                                                        const u32* cmd_buff) {
     // Number of params == bits 0-5 + bits 6-11
     int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
@@ -158,9 +158,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
         return ReportUnimplementedFunction(ctx, info);
     }
 
-    LOG_TRACE(
-        Service, "{}",
-        MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
+    LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
     handler_invoker(this, info->handler_callback, ctx);
 }
 
@@ -169,7 +167,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
     case IPC::CommandType::Close: {
         IPC::ResponseBuilder rb{context, 2};
         rb.Push(RESULT_SUCCESS);
-        return ResultCode(ErrorModule::HIPC, ErrorDescription::RemoteProcessDead);
+        return IPC::ERR_REMOTE_PROCESS_DEAD;
     }
     case IPC::CommandType::ControlWithContext:
     case IPC::CommandType::Control: {
@@ -194,10 +192,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
 // Module interface
 
 /// Initialize ServiceManager
-void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) {
+void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
+          FileSys::VfsFilesystem& vfs) {
     // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
     // here and pass it into the respective InstallInterfaces functions.
-    auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>();
+    auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());
 
     SM::ServiceManager::InstallInterfaces(sm);
 
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 029533628..abbfe5524 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -14,6 +14,14 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Namespace Service
 
+namespace Core {
+class System;
+}
+
+namespace FileSys {
+class VfsFilesystem;
+}
+
 namespace Kernel {
 class ClientPort;
 class ServerPort;
@@ -21,10 +29,6 @@ class ServerSession;
 class HLERequestContext;
 } // namespace Kernel
 
-namespace FileSys {
-class VfsFilesystem;
-}
-
 namespace Service {
 
 namespace SM {
@@ -86,7 +90,7 @@ private:
                            Kernel::HLERequestContext& ctx);
 
     ServiceFrameworkBase(const char* service_name, u32 max_sessions, InvokerFn* handler_invoker);
-    ~ServiceFrameworkBase();
+    ~ServiceFrameworkBase() override;
 
     void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n);
     void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);
@@ -178,7 +182,8 @@ private:
 };
 
 /// Initialize ServiceManager
-void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs);
+void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
+          FileSys::VfsFilesystem& vfs);
 
 /// Shutdown ServiceManager
 void Shutdown();
diff --git a/src/core/hle/service/set/set_cal.h b/src/core/hle/service/set/set_cal.h
index 583036eac..a0677e815 100644
--- a/src/core/hle/service/set/set_cal.h
+++ b/src/core/hle/service/set/set_cal.h
@@ -11,7 +11,7 @@ namespace Service::Set {
 class SET_CAL final : public ServiceFramework<SET_CAL> {
 public:
     explicit SET_CAL();
-    ~SET_CAL();
+    ~SET_CAL() override;
 };
 
 } // namespace Service::Set
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index c9b4da5b0..ecee554bf 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -2,13 +2,88 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/file_sys/errors.h"
+#include "core/file_sys/system_archive/system_version.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
+#include "core/hle/service/filesystem/filesystem.h"
 #include "core/hle/service/set/set_sys.h"
 
 namespace Service::Set {
 
+namespace {
+constexpr u64 SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET = 0x05;
+
+enum class GetFirmwareVersionType {
+    Version1,
+    Version2,
+};
+
+void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionType type) {
+    LOG_WARNING(Service_SET, "called - Using hardcoded firmware version '{}'",
+                FileSys::SystemArchive::GetLongDisplayVersion());
+
+    ASSERT_MSG(ctx.GetWriteBufferSize() == 0x100,
+               "FirmwareVersion output buffer must be 0x100 bytes in size!");
+
+    // Instead of using the normal procedure of checking for the real system archive and if it
+    // doesn't exist, synthesizing one, I feel that that would lead to strange bugs because a
+    // used is using a really old or really new SystemVersion title. The synthesized one ensures
+    // consistence (currently reports as 5.1.0-0.0)
+    const auto archive = FileSys::SystemArchive::SystemVersion();
+
+    const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) {
+        LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).",
+                  desc.c_str());
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(code);
+    };
+
+    if (archive == nullptr) {
+        early_exit_failure("The system version archive couldn't be synthesized.",
+                           FileSys::ERROR_FAILED_MOUNT_ARCHIVE);
+        return;
+    }
+
+    const auto ver_file = archive->GetFile("file");
+    if (ver_file == nullptr) {
+        early_exit_failure("The system version archive didn't contain the file 'file'.",
+                           FileSys::ERROR_INVALID_ARGUMENT);
+        return;
+    }
+
+    auto data = ver_file->ReadAllBytes();
+    if (data.size() != 0x100) {
+        early_exit_failure("The system version file 'file' was not the correct size.",
+                           FileSys::ERROR_OUT_OF_BOUNDS);
+        return;
+    }
+
+    // If the command is GetFirmwareVersion (as opposed to GetFirmwareVersion2), hardware will
+    // zero out the REVISION_MINOR field.
+    if (type == GetFirmwareVersionType::Version1) {
+        data[SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET] = 0;
+    }
+
+    ctx.WriteBuffer(data);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+} // Anonymous namespace
+
+void SET_SYS::GetFirmwareVersion(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version1);
+}
+
+void SET_SYS::GetFirmwareVersion2(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version2);
+}
+
 void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_SET, "called");
 
@@ -33,8 +108,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
         {0, nullptr, "SetLanguageCode"},
         {1, nullptr, "SetNetworkSettings"},
         {2, nullptr, "GetNetworkSettings"},
-        {3, nullptr, "GetFirmwareVersion"},
-        {4, nullptr, "GetFirmwareVersion2"},
+        {3, &SET_SYS::GetFirmwareVersion, "GetFirmwareVersion"},
+        {4, &SET_SYS::GetFirmwareVersion2, "GetFirmwareVersion2"},
         {5, nullptr, "GetFirmwareVersionDigest"},
         {7, nullptr, "GetLockScreenFlag"},
         {8, nullptr, "SetLockScreenFlag"},
diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h
index f602f3c77..13ee2cf46 100644
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -20,6 +20,8 @@ private:
         BasicBlack = 1,
     };
 
+    void GetFirmwareVersion(Kernel::HLERequestContext& ctx);
+    void GetFirmwareVersion2(Kernel::HLERequestContext& ctx);
     void GetColorSetId(Kernel::HLERequestContext& ctx);
     void SetColorSetId(Kernel::HLERequestContext& ctx);
 
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index 74da4d5e6..e9ee73710 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
 
     IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
     rb.Push(RESULT_SUCCESS);
-    Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client};
+    Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
     rb.PushMoveObjects(session);
 
     LOG_DEBUG(Service, "session={}", session->GetObjectId());
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index bef25433e..b9d6381b4 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -67,7 +67,7 @@ public:
         if (port == nullptr) {
             return nullptr;
         }
-        return std::static_pointer_cast<T>(port->hle_handler);
+        return std::static_pointer_cast<T>(port->GetHLEHandler());
     }
 
     void InvokeControlRequest(Kernel::HLERequestContext& context);
diff --git a/src/core/hle/service/sockets/sfdnsres.cpp b/src/core/hle/service/sockets/sfdnsres.cpp
index 13ab1d31e..852e71e4b 100644
--- a/src/core/hle/service/sockets/sfdnsres.cpp
+++ b/src/core/hle/service/sockets/sfdnsres.cpp
@@ -8,12 +8,20 @@
 namespace Service::Sockets {
 
 void SFDNSRES::GetAddrInfo(Kernel::HLERequestContext& ctx) {
+    struct Parameters {
+        u8 use_nsd_resolve;
+        u32 unknown;
+        u64 process_id;
+    };
+
     IPC::RequestParser rp{ctx};
+    const auto parameters = rp.PopRaw<Parameters>();
 
-    LOG_WARNING(Service, "(STUBBED) called");
+    LOG_WARNING(Service,
+                "(STUBBED) called. use_nsd_resolve={}, unknown=0x{:08X}, process_id=0x{:016X}",
+                parameters.use_nsd_resolve, parameters.unknown, parameters.process_id);
 
     IPC::ResponseBuilder rb{ctx, 2};
-
     rb.Push(RESULT_SUCCESS);
 }
 
diff --git a/src/core/hle/service/spl/module.cpp b/src/core/hle/service/spl/module.cpp
index 8db0c2f13..e724d4ab8 100644
--- a/src/core/hle/service/spl/module.cpp
+++ b/src/core/hle/service/spl/module.cpp
@@ -26,9 +26,7 @@ Module::Interface::~Interface() = default;
 void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_SPL, "called");
 
-    IPC::RequestParser rp{ctx};
-
-    std::size_t size = ctx.GetWriteBufferSize();
+    const std::size_t size = ctx.GetWriteBufferSize();
 
     std::uniform_int_distribution<u16> distribution(0, std::numeric_limits<u8>::max());
     std::vector<u8> data(size);
diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp
index af40a1815..f7f87a958 100644
--- a/src/core/hle/service/ssl/ssl.cpp
+++ b/src/core/hle/service/ssl/ssl.cpp
@@ -64,13 +64,19 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~ISslContext() = default;
 
 private:
     void SetOption(Kernel::HLERequestContext& ctx) {
-        LOG_WARNING(Service_SSL, "(STUBBED) called");
+        struct Parameters {
+            u8 enable;
+            u32 option;
+        };
 
         IPC::RequestParser rp{ctx};
+        const auto parameters = rp.PopRaw<Parameters>();
+
+        LOG_WARNING(Service_SSL, "(STUBBED) called. enable={}, option={}", parameters.enable,
+                    parameters.option);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index c13640ad8..aa115935d 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -5,6 +5,7 @@
 #include <chrono>
 #include <ctime>
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/ipc_helpers.h"
@@ -106,8 +107,9 @@ private:
     void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_Time, "called");
 
-        SteadyClockTimePoint steady_clock_time_point{
-            CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000};
+        const auto& core_timing = Core::System::GetInstance().CoreTiming();
+        const SteadyClockTimePoint steady_clock_time_point{
+            Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000};
         IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
         rb.Push(RESULT_SUCCESS);
         rb.PushRaw(steady_clock_time_point);
@@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
         return;
     }
 
+    const auto& core_timing = Core::System::GetInstance().CoreTiming();
     const SteadyClockTimePoint steady_clock_time_point{
-        CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}};
+        Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}};
 
     CalendarTime calendar_time{};
     calendar_time.year = tm->tm_year + 1900;
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
new file mode 100644
index 000000000..01d80311b
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -0,0 +1,71 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <utility>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "core/hle/kernel/readable_event.h"
+#include "core/hle/service/vi/display/vi_display.h"
+#include "core/hle/service/vi/layer/vi_layer.h"
+
+namespace Service::VI {
+
+Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
+    auto& kernel = Core::System::GetInstance().Kernel();
+    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
+                                                         fmt::format("Display VSync Event {}", id));
+}
+
+Display::~Display() = default;
+
+Layer& Display::GetLayer(std::size_t index) {
+    return layers.at(index);
+}
+
+const Layer& Display::GetLayer(std::size_t index) const {
+    return layers.at(index);
+}
+
+Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
+    return vsync_event.readable;
+}
+
+void Display::SignalVSyncEvent() {
+    vsync_event.writable->Signal();
+}
+
+void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
+    // TODO(Subv): Support more than 1 layer.
+    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
+
+    layers.emplace_back(id, buffer_queue);
+}
+
+Layer* Display::FindLayer(u64 id) {
+    const auto itr = std::find_if(layers.begin(), layers.end(),
+                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+
+    if (itr == layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
+}
+
+const Layer* Display::FindLayer(u64 id) const {
+    const auto itr = std::find_if(layers.begin(), layers.end(),
+                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+
+    if (itr == layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
+}
+
+} // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
new file mode 100644
index 000000000..2acd46ff8
--- /dev/null
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -0,0 +1,98 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "common/common_types.h"
+#include "core/hle/kernel/writable_event.h"
+
+namespace Service::NVFlinger {
+class BufferQueue;
+}
+
+namespace Service::VI {
+
+class Layer;
+
+/// Represents a single display type
+class Display {
+public:
+    /// Constructs a display with a given unique ID and name.
+    ///
+    /// @param id   The unique ID for this display.
+    /// @param name The name for this display.
+    ///
+    Display(u64 id, std::string name);
+    ~Display();
+
+    Display(const Display&) = delete;
+    Display& operator=(const Display&) = delete;
+
+    Display(Display&&) = default;
+    Display& operator=(Display&&) = default;
+
+    /// Gets the unique ID assigned to this display.
+    u64 GetID() const {
+        return id;
+    }
+
+    /// Gets the name of this display
+    const std::string& GetName() const {
+        return name;
+    }
+
+    /// Whether or not this display has any layers added to it.
+    bool HasLayers() const {
+        return !layers.empty();
+    }
+
+    /// Gets a layer for this display based off an index.
+    Layer& GetLayer(std::size_t index);
+
+    /// Gets a layer for this display based off an index.
+    const Layer& GetLayer(std::size_t index) const;
+
+    /// Gets the readable vsync event.
+    Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
+
+    /// Signals the internal vsync event.
+    void SignalVSyncEvent();
+
+    /// Creates and adds a layer to this display with the given ID.
+    ///
+    /// @param id           The ID to assign to the created layer.
+    /// @param buffer_queue The buffer queue for the layer instance to use.
+    ///
+    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
+
+    /// Attempts to find a layer with the given ID.
+    ///
+    /// @param id The layer ID.
+    ///
+    /// @returns If found, the Layer instance with the given ID.
+    ///          If not found, then nullptr is returned.
+    ///
+    Layer* FindLayer(u64 id);
+
+    /// Attempts to find a layer with the given ID.
+    ///
+    /// @param id The layer ID.
+    ///
+    /// @returns If found, the Layer instance with the given ID.
+    ///          If not found, then nullptr is returned.
+    ///
+    const Layer* FindLayer(u64 id) const;
+
+private:
+    u64 id;
+    std::string name;
+
+    std::vector<Layer> layers;
+    Kernel::EventPair vsync_event;
+};
+
+} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp
new file mode 100644
index 000000000..954225c26
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -0,0 +1,13 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/service/vi/layer/vi_layer.h"
+
+namespace Service::VI {
+
+Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
+
+Layer::~Layer() = default;
+
+} // namespace Service::VI
diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h
new file mode 100644
index 000000000..c6bfd01f6
--- /dev/null
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Service::NVFlinger {
+class BufferQueue;
+}
+
+namespace Service::VI {
+
+/// Represents a single display layer.
+class Layer {
+public:
+    /// Constructs a layer with a given ID and buffer queue.
+    ///
+    /// @param id    The ID to assign to this layer.
+    /// @param queue The buffer queue for this layer to use.
+    ///
+    Layer(u64 id, NVFlinger::BufferQueue& queue);
+    ~Layer();
+
+    Layer(const Layer&) = delete;
+    Layer& operator=(const Layer&) = delete;
+
+    Layer(Layer&&) = default;
+    Layer& operator=(Layer&&) = delete;
+
+    /// Gets the ID for this layer.
+    u64 GetID() const {
+        return id;
+    }
+
+    /// Gets a reference to the buffer queue this layer is using.
+    NVFlinger::BufferQueue& GetBufferQueue() {
+        return buffer_queue;
+    }
+
+    /// Gets a const reference to the buffer queue this layer is using.
+    const NVFlinger::BufferQueue& GetBufferQueue() const {
+        return buffer_queue;
+    }
+
+private:
+    u64 id;
+    NVFlinger::BufferQueue& buffer_queue;
+};
+
+} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 70c933934..4e17249a9 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -24,6 +24,7 @@
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
+#include "core/hle/service/service.h"
 #include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_m.h"
 #include "core/hle/service/vi/vi_s.h"
@@ -33,7 +34,9 @@
 namespace Service::VI {
 
 constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
+constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5};
 constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
+constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
 
 struct DisplayInfo {
     /// The name of this particular display.
@@ -419,7 +422,7 @@ public:
         u32_le fence_is_valid;
         std::array<Fence, 2> fences;
 
-        MathUtil::Rectangle<int> GetCropRect() const {
+        Common::Rectangle<int> GetCropRect() const {
             return {crop_left, crop_top, crop_right, crop_bottom};
         }
     };
@@ -495,7 +498,6 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~IHOSBinderDriver() = default;
 
 private:
     enum class TransactionId {
@@ -524,7 +526,7 @@ private:
         LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                   static_cast<u32>(transaction), flags);
 
-        auto buffer_queue = nv_flinger->GetBufferQueue(id);
+        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
 
         if (transaction == TransactionId::Connect) {
             IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -537,7 +539,7 @@ private:
         } else if (transaction == TransactionId::SetPreallocatedBuffer) {
             IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
 
-            buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);
+            buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
 
             IGBPSetPreallocatedBufferResponseParcel response{};
             ctx.WriteBuffer(response.Serialize());
@@ -545,7 +547,7 @@ private:
             IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
             const u32 width{request.data.width};
             const u32 height{request.data.height};
-            std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+            std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
 
             if (slot) {
                 // Buffer is available
@@ -558,8 +560,8 @@ private:
                     [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                         Kernel::ThreadWakeupReason reason) {
                         // Repeat TransactParcel DequeueBuffer when a buffer is available
-                        auto buffer_queue = nv_flinger->GetBufferQueue(id);
-                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+                        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
+                        std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
                         ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
 
                         IGBPDequeueBufferResponseParcel response{*slot};
@@ -567,28 +569,28 @@ private:
                         IPC::ResponseBuilder rb{ctx, 2};
                         rb.Push(RESULT_SUCCESS);
                     },
-                    buffer_queue->GetWritableBufferWaitEvent());
+                    buffer_queue.GetWritableBufferWaitEvent());
             }
         } else if (transaction == TransactionId::RequestBuffer) {
             IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
 
-            auto& buffer = buffer_queue->RequestBuffer(request.slot);
+            auto& buffer = buffer_queue.RequestBuffer(request.slot);
 
             IGBPRequestBufferResponseParcel response{buffer};
             ctx.WriteBuffer(response.Serialize());
         } else if (transaction == TransactionId::QueueBuffer) {
             IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
 
-            buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
-                                      request.data.GetCropRect());
+            buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
+                                     request.data.GetCropRect());
 
             IGBPQueueBufferResponseParcel response{1280, 720};
             ctx.WriteBuffer(response.Serialize());
         } else if (transaction == TransactionId::Query) {
             IGBPQueryRequestParcel request{ctx.ReadBuffer()};
 
-            u32 value =
-                buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
+            const u32 value =
+                buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
 
             IGBPQueryResponseParcel response{value};
             ctx.WriteBuffer(response.Serialize());
@@ -628,12 +630,12 @@ private:
 
         LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
 
-        const auto buffer_queue = nv_flinger->GetBufferQueue(id);
+        const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
 
         // TODO(Subv): Find out what this actually is.
         IPC::ResponseBuilder rb{ctx, 2, 1};
         rb.Push(RESULT_SUCCESS);
-        rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
+        rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
     }
 
     std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -689,7 +691,6 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~ISystemDisplayService() = default;
 
 private:
     void SetLayerZ(Kernel::HLERequestContext& ctx) {
@@ -704,13 +705,14 @@ private:
         rb.Push(RESULT_SUCCESS);
     }
 
+    // This function currently does nothing but return a success error code in
+    // the vi library itself, so do the same thing, but log out the passed in values.
     void SetLayerVisibility(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
         const u64 layer_id = rp.Pop<u64>();
         const bool visibility = rp.Pop<bool>();
 
-        LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id,
-                    visibility);
+        LOG_DEBUG(Service_VI, "called, layer_id=0x{:08X}, visibility={}", layer_id, visibility);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
@@ -750,6 +752,7 @@ public:
             {1102, nullptr, "GetDisplayResolution"},
             {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
             {2011, nullptr, "DestroyManagedLayer"},
+            {2012, nullptr, "CreateStrayLayer"},
             {2050, nullptr, "CreateIndirectLayer"},
             {2051, nullptr, "DestroyIndirectLayer"},
             {2052, nullptr, "CreateIndirectProducerEndPoint"},
@@ -813,7 +816,6 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~IManagerDisplayService() = default;
 
 private:
     void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -837,11 +839,16 @@ private:
                     "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}",
                     unknown, display, aruid);
 
-        const u64 layer_id = nv_flinger->CreateLayer(display);
+        const auto layer_id = nv_flinger->CreateLayer(display);
+        if (!layer_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
 
         IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
-        rb.Push(layer_id);
+        rb.Push(*layer_id);
     }
 
     void AddToLayerStack(Kernel::HLERequestContext& ctx) {
@@ -874,7 +881,6 @@ private:
 class IApplicationDisplayService final : public ServiceFramework<IApplicationDisplayService> {
 public:
     explicit IApplicationDisplayService(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
-    ~IApplicationDisplayService() = default;
 
 private:
     enum class ConvertedScaleMode : u64 {
@@ -949,9 +955,16 @@ private:
 
         ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");
 
+        const auto display_id = nv_flinger->OpenDisplay(name);
+        if (!display_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
+
         IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
-        rb.Push<u64>(nv_flinger->OpenDisplay(name));
+        rb.Push<u64>(*display_id);
     }
 
     void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -1020,7 +1033,6 @@ private:
     void ListDisplays(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_VI, "(STUBBED) called");
 
-        IPC::RequestParser rp{ctx};
         DisplayInfo display_info;
         display_info.width *= static_cast<u64>(Settings::values.resolution_factor);
         display_info.height *= static_cast<u64>(Settings::values.resolution_factor);
@@ -1042,10 +1054,21 @@ private:
 
         LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid);
 
-        const u64 display_id = nv_flinger->OpenDisplay(display_name);
-        const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);
+        const auto display_id = nv_flinger->OpenDisplay(display_name);
+        if (!display_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
+
+        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
+        if (!buffer_queue_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
 
-        NativeWindow native_window{buffer_queue_id};
+        NativeWindow native_window{*buffer_queue_id};
         IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -1061,13 +1084,24 @@ private:
 
         // TODO(Subv): What's the difference between a Stray and a Managed layer?
 
-        const u64 layer_id = nv_flinger->CreateLayer(display_id);
-        const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);
+        const auto layer_id = nv_flinger->CreateLayer(display_id);
+        if (!layer_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
+
+        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
+        if (!buffer_queue_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
 
-        NativeWindow native_window{buffer_queue_id};
+        NativeWindow native_window{*buffer_queue_id};
         IPC::ResponseBuilder rb{ctx, 6};
         rb.Push(RESULT_SUCCESS);
-        rb.Push(layer_id);
+        rb.Push(*layer_id);
         rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
     }
 
@@ -1087,7 +1121,12 @@ private:
 
         LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);
 
-        const auto vsync_event = nv_flinger->GetVsyncEvent(display_id);
+        const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
+        if (!vsync_event) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
 
         IPC::ResponseBuilder rb{ctx, 2, 1};
         rb.Push(RESULT_SUCCESS);
@@ -1161,26 +1200,40 @@ IApplicationDisplayService::IApplicationDisplayService(
     RegisterHandlers(functions);
 }
 
-Module::Interface::Interface(std::shared_ptr<Module> module, const char* name,
-                             std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {}
+static bool IsValidServiceAccess(Permission permission, Policy policy) {
+    if (permission == Permission::User) {
+        return policy == Policy::User;
+    }
+
+    if (permission == Permission::System || permission == Permission::Manager) {
+        return policy == Policy::User || policy == Policy::Compositor;
+    }
+
+    return false;
+}
 
-Module::Interface::~Interface() = default;
+void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
+                                   std::shared_ptr<NVFlinger::NVFlinger> nv_flinger,
+                                   Permission permission) {
+    IPC::RequestParser rp{ctx};
+    const auto policy = rp.PopEnum<Policy>();
 
-void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_VI, "(STUBBED) called");
+    if (!IsValidServiceAccess(permission, policy)) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ERR_PERMISSION_DENIED);
+        return;
+    }
 
     IPC::ResponseBuilder rb{ctx, 2, 0, 1};
     rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger);
+    rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger));
 }
 
 void InstallInterfaces(SM::ServiceManager& service_manager,
                        std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) {
-    auto module = std::make_shared<Module>();
-    std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager);
-    std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager);
-    std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager);
+    std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager);
+    std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager);
+    std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager);
 }
 
 } // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index e3963502a..6b66f8b81 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,12 +4,21 @@
 
 #pragma once
 
-#include "core/hle/service/service.h"
+#include <memory>
+#include "common/common_types.h"
+
+namespace Kernel {
+class HLERequestContext;
+}
 
 namespace Service::NVFlinger {
 class NVFlinger;
 }
 
+namespace Service::SM {
+class ServiceManager;
+}
+
 namespace Service::VI {
 
 enum class DisplayResolution : u32 {
@@ -19,22 +28,25 @@ enum class DisplayResolution : u32 {
     UndockedHeight = 720,
 };
 
-class Module final {
-public:
-    class Interface : public ServiceFramework<Interface> {
-    public:
-        explicit Interface(std::shared_ptr<Module> module, const char* name,
-                           std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
-        ~Interface() override;
-
-        void GetDisplayService(Kernel::HLERequestContext& ctx);
+/// Permission level for a particular VI service instance
+enum class Permission {
+    User,
+    System,
+    Manager,
+};
 
-    protected:
-        std::shared_ptr<Module> module;
-        std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
-    };
+/// A policy type that may be requested via GetDisplayService and
+/// GetDisplayServiceWithProxyNameExchange
+enum class Policy {
+    User,
+    Compositor,
 };
 
+namespace detail {
+void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
+                           std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission);
+} // namespace detail
+
 /// Registers all VI services with the specified service manager.
 void InstallInterfaces(SM::ServiceManager& service_manager,
                        std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
diff --git a/src/core/hle/service/vi/vi_m.cpp b/src/core/hle/service/vi/vi_m.cpp
index 207c06b16..06070087f 100644
--- a/src/core/hle/service/vi/vi_m.cpp
+++ b/src/core/hle/service/vi/vi_m.cpp
@@ -2,12 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/logging/log.h"
+#include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_m.h"
 
 namespace Service::VI {
 
-VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) {
+VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
+    : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} {
     static const FunctionInfo functions[] = {
         {2, &VI_M::GetDisplayService, "GetDisplayService"},
         {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
 
 VI_M::~VI_M() = default;
 
+void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_VI, "called");
+
+    detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager);
+}
+
 } // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_m.h b/src/core/hle/service/vi/vi_m.h
index 487d58d50..290e06689 100644
--- a/src/core/hle/service/vi/vi_m.h
+++ b/src/core/hle/service/vi/vi_m.h
@@ -4,14 +4,27 @@
 
 #pragma once
 
-#include "core/hle/service/vi/vi.h"
+#include "core/hle/service/service.h"
+
+namespace Kernel {
+class HLERequestContext;
+}
+
+namespace Service::NVFlinger {
+class NVFlinger;
+}
 
 namespace Service::VI {
 
-class VI_M final : public Module::Interface {
+class VI_M final : public ServiceFramework<VI_M> {
 public:
-    explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
+    explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
     ~VI_M() override;
+
+private:
+    void GetDisplayService(Kernel::HLERequestContext& ctx);
+
+    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
 };
 
 } // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.cpp b/src/core/hle/service/vi/vi_s.cpp
index 920e6a1f6..57c596cc4 100644
--- a/src/core/hle/service/vi/vi_s.cpp
+++ b/src/core/hle/service/vi/vi_s.cpp
@@ -2,12 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/logging/log.h"
+#include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_s.h"
 
 namespace Service::VI {
 
-VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) {
+VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
+    : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} {
     static const FunctionInfo functions[] = {
         {1, &VI_S::GetDisplayService, "GetDisplayService"},
         {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
 
 VI_S::~VI_S() = default;
 
+void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_VI, "called");
+
+    detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System);
+}
+
 } // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_s.h b/src/core/hle/service/vi/vi_s.h
index bbc31148f..47804dc0b 100644
--- a/src/core/hle/service/vi/vi_s.h
+++ b/src/core/hle/service/vi/vi_s.h
@@ -4,14 +4,27 @@
 
 #pragma once
 
-#include "core/hle/service/vi/vi.h"
+#include "core/hle/service/service.h"
+
+namespace Kernel {
+class HLERequestContext;
+}
+
+namespace Service::NVFlinger {
+class NVFlinger;
+}
 
 namespace Service::VI {
 
-class VI_S final : public Module::Interface {
+class VI_S final : public ServiceFramework<VI_S> {
 public:
-    explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
+    explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
     ~VI_S() override;
+
+private:
+    void GetDisplayService(Kernel::HLERequestContext& ctx);
+
+    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
 };
 
 } // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.cpp b/src/core/hle/service/vi/vi_u.cpp
index d81e410d6..9d5ceb608 100644
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -2,12 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/logging/log.h"
+#include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_u.h"
 
 namespace Service::VI {
 
-VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) {
+VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
+    : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} {
     static const FunctionInfo functions[] = {
         {0, &VI_U::GetDisplayService, "GetDisplayService"},
     };
@@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
 
 VI_U::~VI_U() = default;
 
+void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_VI, "called");
+
+    detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User);
+}
+
 } // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi_u.h b/src/core/hle/service/vi/vi_u.h
index b92f28c92..19bdb73b0 100644
--- a/src/core/hle/service/vi/vi_u.h
+++ b/src/core/hle/service/vi/vi_u.h
@@ -4,14 +4,27 @@
 
 #pragma once
 
-#include "core/hle/service/vi/vi.h"
+#include "core/hle/service/service.h"
+
+namespace Kernel {
+class HLERequestContext;
+}
+
+namespace Service::NVFlinger {
+class NVFlinger;
+}
 
 namespace Service::VI {
 
-class VI_U final : public Module::Interface {
+class VI_U final : public ServiceFramework<VI_U> {
 public:
-    explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
+    explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
     ~VI_U() override;
+
+private:
+    void GetDisplayService(Kernel::HLERequestContext& ctx);
+
+    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
 };
 
 } // namespace Service::VI
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6057c7f26..46ac372f6 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "common/file_util.h"
 #include "common/logging/log.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/loader/elf.h"
@@ -340,7 +341,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
     }
 
     codeset.entrypoint = base_addr + header->e_entry;
-    codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image));
+    codeset.memory = std::move(program_image);
 
     LOG_DEBUG(Loader, "Done loading.");
 
diff --git a/src/core/loader/linker.cpp b/src/core/loader/linker.cpp
deleted file mode 100644
index 57ca8c3ee..000000000
--- a/src/core/loader/linker.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <vector>
-
-#include "common/common_funcs.h"
-#include "common/logging/log.h"
-#include "common/swap.h"
-#include "core/loader/linker.h"
-#include "core/memory.h"
-
-namespace Loader {
-
-enum class RelocationType : u32 { ABS64 = 257, GLOB_DAT = 1025, JUMP_SLOT = 1026, RELATIVE = 1027 };
-
-enum DynamicType : u32 {
-    DT_NULL = 0,
-    DT_PLTRELSZ = 2,
-    DT_STRTAB = 5,
-    DT_SYMTAB = 6,
-    DT_RELA = 7,
-    DT_RELASZ = 8,
-    DT_STRSZ = 10,
-    DT_JMPREL = 23,
-};
-
-struct Elf64_Rela {
-    u64_le offset;
-    RelocationType type;
-    u32_le symbol;
-    s64_le addend;
-};
-static_assert(sizeof(Elf64_Rela) == 0x18, "Elf64_Rela has incorrect size.");
-
-struct Elf64_Dyn {
-    u64_le tag;
-    u64_le value;
-};
-static_assert(sizeof(Elf64_Dyn) == 0x10, "Elf64_Dyn has incorrect size.");
-
-struct Elf64_Sym {
-    u32_le name;
-    INSERT_PADDING_BYTES(0x2);
-    u16_le shndx;
-    u64_le value;
-    u64_le size;
-};
-static_assert(sizeof(Elf64_Sym) == 0x18, "Elf64_Sym has incorrect size.");
-
-void Linker::WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
-                              u64 relocation_offset, u64 size, VAddr load_base) {
-    for (u64 i = 0; i < size; i += sizeof(Elf64_Rela)) {
-        Elf64_Rela rela;
-        std::memcpy(&rela, &program_image[relocation_offset + i], sizeof(Elf64_Rela));
-
-        const Symbol& symbol = symbols[rela.symbol];
-        switch (rela.type) {
-        case RelocationType::RELATIVE: {
-            const u64 value = load_base + rela.addend;
-            if (!symbol.name.empty()) {
-                exports[symbol.name] = value;
-            }
-            std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
-            break;
-        }
-        case RelocationType::JUMP_SLOT:
-        case RelocationType::GLOB_DAT:
-            if (!symbol.value) {
-                imports[symbol.name] = {rela.offset + load_base, 0};
-            } else {
-                exports[symbol.name] = symbol.value;
-                std::memcpy(&program_image[rela.offset], &symbol.value, sizeof(u64));
-            }
-            break;
-        case RelocationType::ABS64:
-            if (!symbol.value) {
-                imports[symbol.name] = {rela.offset + load_base, rela.addend};
-            } else {
-                const u64 value = symbol.value + rela.addend;
-                exports[symbol.name] = value;
-                std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
-            }
-            break;
-        default:
-            LOG_CRITICAL(Loader, "Unknown relocation type: {}", static_cast<int>(rela.type));
-            break;
-        }
-    }
-}
-
-void Linker::Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base) {
-    std::map<u64, u64> dynamic;
-    while (dynamic_section_offset < program_image.size()) {
-        Elf64_Dyn dyn;
-        std::memcpy(&dyn, &program_image[dynamic_section_offset], sizeof(Elf64_Dyn));
-        dynamic_section_offset += sizeof(Elf64_Dyn);
-
-        if (dyn.tag == DT_NULL) {
-            break;
-        }
-        dynamic[dyn.tag] = dyn.value;
-    }
-
-    u64 offset = dynamic[DT_SYMTAB];
-    std::vector<Symbol> symbols;
-    while (offset < program_image.size()) {
-        Elf64_Sym sym;
-        std::memcpy(&sym, &program_image[offset], sizeof(Elf64_Sym));
-        offset += sizeof(Elf64_Sym);
-
-        if (sym.name >= dynamic[DT_STRSZ]) {
-            break;
-        }
-
-        std::string name = reinterpret_cast<char*>(&program_image[dynamic[DT_STRTAB] + sym.name]);
-        if (sym.value) {
-            exports[name] = load_base + sym.value;
-            symbols.emplace_back(std::move(name), load_base + sym.value);
-        } else {
-            symbols.emplace_back(std::move(name), 0);
-        }
-    }
-
-    if (dynamic.find(DT_RELA) != dynamic.end()) {
-        WriteRelocations(program_image, symbols, dynamic[DT_RELA], dynamic[DT_RELASZ], load_base);
-    }
-
-    if (dynamic.find(DT_JMPREL) != dynamic.end()) {
-        WriteRelocations(program_image, symbols, dynamic[DT_JMPREL], dynamic[DT_PLTRELSZ],
-                         load_base);
-    }
-}
-
-void Linker::ResolveImports() {
-    // Resolve imports
-    for (const auto& import : imports) {
-        const auto& search = exports.find(import.first);
-        if (search != exports.end()) {
-            Memory::Write64(import.second.ea, search->second + import.second.addend);
-        } else {
-            LOG_ERROR(Loader, "Unresolved import: {}", import.first);
-        }
-    }
-}
-
-} // namespace Loader
diff --git a/src/core/loader/linker.h b/src/core/loader/linker.h
deleted file mode 100644
index 107625837..000000000
--- a/src/core/loader/linker.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <map>
-#include <string>
-#include "common/common_types.h"
-
-namespace Loader {
-
-class Linker {
-protected:
-    struct Symbol {
-        Symbol(std::string&& name, u64 value) : name(std::move(name)), value(value) {}
-        std::string name;
-        u64 value;
-    };
-
-    struct Import {
-        VAddr ea;
-        s64 addend;
-    };
-
-    void WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
-                          u64 relocation_offset, u64 size, VAddr load_base);
-    void Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base);
-
-    void ResolveImports();
-
-    std::map<std::string, Import> imports;
-    std::map<std::string, VAddr> exports;
-};
-
-} // namespace Loader
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 4fad0c0dd..31e4a0c84 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -14,6 +14,7 @@
 #include "core/file_sys/romfs_factory.h"
 #include "core/file_sys/vfs_offset.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/hle/service/filesystem/filesystem.h"
@@ -186,7 +187,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
     program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
 
     // Load codeset for current process
-    codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image));
+    codeset.memory = std::move(program_image);
     process.LoadModule(std::move(codeset), load_base);
 
     // Register module with GDBStub
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 013d629c0..85b0ed644 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -4,10 +4,10 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <vector>
 #include "common/common_types.h"
-#include "core/loader/linker.h"
 #include "core/loader/loader.h"
 
 namespace FileSys {
@@ -21,7 +21,7 @@ class Process;
 namespace Loader {
 
 /// Loads an NRO file
-class AppLoader_NRO final : public AppLoader, Linker {
+class AppLoader_NRO final : public AppLoader {
 public:
     explicit AppLoader_NRO(FileSys::VirtualFile file);
     ~AppLoader_NRO() override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 6ded0b707..d7c47c197 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -4,50 +4,28 @@
 
 #include <cinttypes>
 #include <vector>
-#include <lz4.h>
+
 #include "common/common_funcs.h"
 #include "common/file_util.h"
+#include "common/hex_util.h"
 #include "common/logging/log.h"
+#include "common/lz4_compression.h"
 #include "common/swap.h"
+#include "core/core.h"
 #include "core/file_sys/patch_manager.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/loader/nso.h"
 #include "core/memory.h"
 #include "core/settings.h"
 
-namespace Loader {
+#pragma optimize("", off)
 
-struct NsoSegmentHeader {
-    u32_le offset;
-    u32_le location;
-    u32_le size;
-    union {
-        u32_le alignment;
-        u32_le bss_size;
-    };
-};
-static_assert(sizeof(NsoSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
-
-struct NsoHeader {
-    u32_le magic;
-    u32_le version;
-    INSERT_PADDING_WORDS(1);
-    u8 flags;
-    std::array<NsoSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
-    std::array<u8, 0x20> build_id;
-    std::array<u32_le, 3> segments_compressed_size;
-
-    bool IsSegmentCompressed(size_t segment_num) const {
-        ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
-        return ((flags >> segment_num) & 1);
-    }
-};
-static_assert(sizeof(NsoHeader) == 0x6c, "NsoHeader has incorrect size.");
-static_assert(std::is_trivially_copyable_v<NsoHeader>, "NsoHeader isn't trivially copyable.");
-
-struct ModHeader {
+namespace Loader {
+namespace {
+struct MODHeader {
     u32_le magic;
     u32_le dynamic_offset;
     u32_le bss_start_offset;
@@ -56,7 +34,28 @@ struct ModHeader {
     u32_le eh_frame_hdr_end_offset;
     u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base
 };
-static_assert(sizeof(ModHeader) == 0x1c, "ModHeader has incorrect size.");
+static_assert(sizeof(MODHeader) == 0x1c, "MODHeader has incorrect size.");
+
+std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
+                                  const NSOSegmentHeader& header) {
+    const std::vector<u8> uncompressed_data =
+        Common::Compression::DecompressDataLZ4(compressed_data, header.size);
+
+    ASSERT_MSG(uncompressed_data.size() == static_cast<int>(header.size), "{} != {}", header.size,
+               uncompressed_data.size());
+
+    return uncompressed_data;
+}
+
+constexpr u32 PageAlignSize(u32 size) {
+    return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
+}
+} // Anonymous namespace
+
+bool NSOHeader::IsSegmentCompressed(size_t segment_num) const {
+    ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
+    return ((flags >> segment_num) & 1) != 0;
+}
 
 AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {}
 
@@ -73,38 +72,22 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::NSO;
 }
 
-static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
-                                         const NsoSegmentHeader& header) {
-    std::vector<u8> uncompressed_data(header.size);
-    const int bytes_uncompressed =
-        LZ4_decompress_safe(reinterpret_cast<const char*>(compressed_data.data()),
-                            reinterpret_cast<char*>(uncompressed_data.data()),
-                            static_cast<int>(compressed_data.size()), header.size);
-
-    ASSERT_MSG(bytes_uncompressed == static_cast<int>(header.size) &&
-                   bytes_uncompressed == static_cast<int>(uncompressed_data.size()),
-               "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
-
-    return uncompressed_data;
-}
-
-static constexpr u32 PageAlignSize(u32 size) {
-    return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
-}
-
 std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
                                                const FileSys::VfsFile& file, VAddr load_base,
                                                bool should_pass_arguments,
                                                std::optional<FileSys::PatchManager> pm) {
-    if (file.GetSize() < sizeof(NsoHeader))
+    if (file.GetSize() < sizeof(NSOHeader)) {
         return {};
+    }
 
-    NsoHeader nso_header{};
-    if (sizeof(NsoHeader) != file.ReadObject(&nso_header))
+    NSOHeader nso_header{};
+    if (sizeof(NSOHeader) != file.ReadObject(&nso_header)) {
         return {};
+    }
 
-    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
         return {};
+    }
 
     // Build program image
     Kernel::CodeSet codeset;
@@ -140,10 +123,10 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
     std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
 
     // Read MOD header
-    ModHeader mod_header{};
+    MODHeader mod_header{};
     // Default .bss to size in segment header if MOD0 section doesn't exist
     u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
-    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader));
+    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(MODHeader));
     const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
     if (has_mod_header) {
         // Resize program image to include .bss section and page align each section
@@ -155,17 +138,29 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
 
     // Apply patches if necessary
     if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) {
-        std::vector<u8> pi_header(program_image.size() + 0x100);
-        std::memcpy(pi_header.data(), &nso_header, sizeof(NsoHeader));
-        std::memcpy(pi_header.data() + 0x100, program_image.data(), program_image.size());
+        std::vector<u8> pi_header;
+        pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
+                         reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
+        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
+                         program_image.end());
+
+        pi_header = pm->PatchNSO(pi_header, file.GetName());
 
-        pi_header = pm->PatchNSO(pi_header);
+        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
+    }
 
-        std::memcpy(program_image.data(), pi_header.data() + 0x100, program_image.size());
+    // Apply cheats if they exist and the program has a valid title ID
+    if (pm) {
+        auto& system = Core::System::GetInstance();
+        const auto cheats = pm->CreateCheatList(system, nso_header.build_id);
+        if (!cheats.empty()) {
+            system.RegisterCheatList(cheats, Common::HexArrayToString(nso_header.build_id),
+                                     load_base, load_base + program_image.size());
+        }
     }
 
     // Load codeset for current process
-    codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image));
+    codeset.memory = std::move(program_image);
     process.LoadModule(std::move(codeset), load_base);
 
     // Register module with GDBStub
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 135b6ea5a..4674c3724 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -4,10 +4,12 @@
 
 #pragma once
 
+#include <array>
 #include <optional>
+#include <type_traits>
 #include "common/common_types.h"
+#include "common/swap.h"
 #include "core/file_sys/patch_manager.h"
-#include "core/loader/linker.h"
 #include "core/loader/loader.h"
 
 namespace Kernel {
@@ -16,6 +18,43 @@ class Process;
 
 namespace Loader {
 
+struct NSOSegmentHeader {
+    u32_le offset;
+    u32_le location;
+    u32_le size;
+    union {
+        u32_le alignment;
+        u32_le bss_size;
+    };
+};
+static_assert(sizeof(NSOSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
+
+struct NSOHeader {
+    using SHA256Hash = std::array<u8, 0x20>;
+
+    struct RODataRelativeExtent {
+        u32_le data_offset;
+        u32_le size;
+    };
+
+    u32_le magic;
+    u32_le version;
+    u32 reserved;
+    u32_le flags;
+    std::array<NSOSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
+    std::array<u8, 0x20> build_id;
+    std::array<u32_le, 3> segments_compressed_size;
+    std::array<u8, 0x1C> padding;
+    RODataRelativeExtent api_info_extent;
+    RODataRelativeExtent dynstr_extent;
+    RODataRelativeExtent dynsyn_extent;
+    std::array<SHA256Hash, 3> segment_hashes;
+
+    bool IsSegmentCompressed(size_t segment_num) const;
+};
+static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size.");
+static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable.");
+
 constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000;
 
 struct NSOArgumentHeader {
@@ -26,7 +65,7 @@ struct NSOArgumentHeader {
 static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size.");
 
 /// Loads an NSO file
-class AppLoader_NSO final : public AppLoader, Linker {
+class AppLoader_NSO final : public AppLoader {
 public:
     explicit AppLoader_NSO(FileSys::VirtualFile file);
 
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index d6995b61e..436f7387c 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -22,7 +22,7 @@ class AppLoader_NCA;
 class AppLoader_XCI final : public AppLoader {
 public:
     explicit AppLoader_XCI(FileSys::VirtualFile file);
-    ~AppLoader_XCI();
+    ~AppLoader_XCI() override;
 
     /**
      * Returns the type of the file
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..4e0538bc2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/page_table.h"
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
@@ -18,13 +19,14 @@
 #include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/memory_setup.h"
+#include "video_core/gpu.h"
 #include "video_core/renderer_base.h"
 
 namespace Memory {
 
-static PageTable* current_page_table = nullptr;
+static Common::PageTable* current_page_table = nullptr;
 
-void SetCurrentPageTable(PageTable* page_table) {
+void SetCurrentPageTable(Common::PageTable* page_table) {
     current_page_table = page_table;
 
     auto& system = Core::System::GetInstance();
@@ -36,88 +38,76 @@ void SetCurrentPageTable(PageTable* page_table) {
     }
 }
 
-PageTable* GetCurrentPageTable() {
-    return current_page_table;
-}
-
-PageTable::PageTable() = default;
-
-PageTable::PageTable(std::size_t address_space_width_in_bits) {
-    Resize(address_space_width_in_bits);
-}
-
-PageTable::~PageTable() = default;
-
-void PageTable::Resize(std::size_t address_space_width_in_bits) {
-    const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
-
-    pointers.resize(num_page_table_entries);
-    attributes.resize(num_page_table_entries);
-
-    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
-    // vector size is subsequently decreased (via resize), the vector might not automatically
-    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
-    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
-
-    pointers.shrink_to_fit();
-    attributes.shrink_to_fit();
-}
-
-static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
+static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
+                     Common::PageType type) {
     LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
               (base + size) * PAGE_SIZE);
 
-    RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
-                                 FlushMode::FlushAndInvalidate);
+    // During boot, current_page_table might not be set yet, in which case we need not flush
+    if (Core::System::GetInstance().IsPoweredOn()) {
+        Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
+                                                                   size * PAGE_SIZE);
+    }
 
     VAddr end = base + size;
-    while (base != end) {
-        ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
+    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+               base + page_table.pointers.size());
+
+    std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
 
-        page_table.attributes[base] = type;
-        page_table.pointers[base] = memory;
+    if (memory == nullptr) {
+        std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+    } else {
+        while (base != end) {
+            page_table.pointers[base] = memory;
 
-        base += 1;
-        if (memory != nullptr)
+            base += 1;
             memory += PAGE_SIZE;
+        }
     }
 }
 
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) {
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
 }
 
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) {
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
 
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
-void UnmapRegion(PageTable& page_table, VAddr base, u64 size) {
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
 
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
     page_table.special_regions.erase(interval);
 }
 
-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook) {
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook) {
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.subtract(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
 /**
@@ -166,22 +156,19 @@ T Read(const VAddr vaddr) {
         return value;
     }
 
-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
     switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
         LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
         return 0;
-    case PageType::Memory:
+    case Common::PageType::Memory:
         ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
         break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush);
-
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
         T value;
-        std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
+        std::memcpy(&value, host_ptr, sizeof(T));
         return value;
     }
     default:
@@ -199,21 +186,19 @@ void Write(const VAddr vaddr, const T data) {
         return;
     }
 
-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
     switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
         LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
                   static_cast<u32>(data), vaddr);
         return;
-    case PageType::Memory:
+    case Common::PageType::Memory:
         ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
         break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
-        std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+        std::memcpy(host_ptr, &data, sizeof(T));
         break;
     }
     default:
@@ -228,10 +213,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
     if (page_pointer)
         return true;
 
-    if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory)
+    if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
         return true;
 
-    if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special)
+    if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
         return false;
 
     return false;
@@ -251,7 +236,8 @@ u8* GetPointer(const VAddr vaddr) {
         return page_pointer + (vaddr & PAGE_MASK);
     }
 
-    if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
+    if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
+        Common::PageType::RasterizerCachedMemory) {
         return GetPointerFromVMA(vaddr);
     }
 
@@ -285,20 +271,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
 
     u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
     for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
-        PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
+        Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
 
         if (cached) {
             // Switch page type to cached if now cached
             switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                 // It is not necessary for a process to have this region mapped into its address
                 // space, for example, a system module need not have a VRAM mapping.
                 break;
-            case PageType::Memory:
-                page_type = PageType::RasterizerCachedMemory;
+            case Common::PageType::Memory:
+                page_type = Common::PageType::RasterizerCachedMemory;
                 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
                 break;
-            case PageType::RasterizerCachedMemory:
+            case Common::PageType::RasterizerCachedMemory:
                 // There can be more than one GPU region mapped per CPU region, so it's common that
                 // this area is already marked as cached.
                 break;
@@ -308,23 +294,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
         } else {
             // Switch page type to uncached if now uncached
             switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                 // It is not necessary for a process to have this region mapped into its address
                 // space, for example, a system module need not have a VRAM mapping.
                 break;
-            case PageType::Memory:
+            case Common::PageType::Memory:
                 // There can be more than one GPU region mapped per CPU region, so it's common that
                 // this area is already unmarked as cached.
                 break;
-            case PageType::RasterizerCachedMemory: {
+            case Common::PageType::RasterizerCachedMemory: {
                 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
                 if (pointer == nullptr) {
                     // It's possible that this function has been called while updating the pagetable
                     // after unmapping a VMA. In that case the underlying VMA will no longer exist,
                     // and we should just leave the pagetable entry blank.
-                    page_type = PageType::Unmapped;
+                    page_type = Common::PageType::Unmapped;
                 } else {
-                    page_type = PageType::Memory;
+                    page_type = Common::PageType::Memory;
                     current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
                 }
                 break;
@@ -336,47 +322,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
     }
 }
 
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
-    auto& system_instance = Core::System::GetInstance();
-
-    // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
-    // null here
-    if (!system_instance.IsPoweredOn()) {
-        return;
-    }
-
-    const VAddr end = start + size;
-
-    const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
-        if (start >= region_end || end <= region_start) {
-            // No overlap with region
-            return;
-        }
-
-        const VAddr overlap_start = std::max(start, region_start);
-        const VAddr overlap_end = std::min(end, region_end);
-        const VAddr overlap_size = overlap_end - overlap_start;
-
-        auto& rasterizer = system_instance.Renderer().Rasterizer();
-        switch (mode) {
-        case FlushMode::Flush:
-            rasterizer.FlushRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::Invalidate:
-            rasterizer.InvalidateRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::FlushAndInvalidate:
-            rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
-            break;
-        }
-    };
-
-    const auto& vm_manager = Core::CurrentProcess()->VMManager();
-
-    CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
-    CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
-}
-
 u8 Read8(const VAddr addr) {
     return Read<u8>(addr);
 }
@@ -407,24 +352,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, src_addr, size);
             std::memset(dest_buffer, 0, copy_amount);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             const u8* src_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_buffer, src_ptr, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(dest_buffer, host_ptr, copy_amount);
             break;
         }
         default:
@@ -471,23 +416,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, dest_addr, size);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             u8* dest_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_ptr, src_buffer, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(host_ptr, src_buffer, copy_amount);
             break;
         }
         default:
@@ -517,23 +462,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, dest_addr, size);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             u8* dest_ptr = page_table.pointers[page_index] + page_offset;
             std::memset(dest_ptr, 0, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memset(host_ptr, 0, copy_amount);
             break;
         }
         default:
@@ -559,23 +504,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, src_addr, size);
             ZeroBlock(process, dest_addr, copy_amount);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
             const u8* src_ptr = page_table.pointers[page_index] + page_offset;
             WriteBlock(process, dest_addr, src_ptr, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            WriteBlock(process, dest_addr, host_ptr, copy_amount);
             break;
         }
         default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..6845f5fe1 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -6,11 +6,11 @@
 
 #include <cstddef>
 #include <string>
-#include <tuple>
-#include <vector>
-#include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}
 
 namespace Kernel {
 class Process;
@@ -26,83 +26,8 @@ constexpr std::size_t PAGE_BITS = 12;
 constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
 constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
 
-enum class PageType : u8 {
-    /// Page is unmapped and should cause an access error.
-    Unmapped,
-    /// Page is mapped to regular memory. This is the only type you can get pointers to.
-    Memory,
-    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
-    /// invalidation
-    RasterizerCachedMemory,
-    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
-    Special,
-};
-
-struct SpecialRegion {
-    enum class Type {
-        DebugHook,
-        IODevice,
-    } type;
-
-    MemoryHookPointer handler;
-
-    bool operator<(const SpecialRegion& other) const {
-        return std::tie(type, handler) < std::tie(other.type, other.handler);
-    }
-
-    bool operator==(const SpecialRegion& other) const {
-        return std::tie(type, handler) == std::tie(other.type, other.handler);
-    }
-};
-
-/**
- * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
- * mimics the way a real CPU page table works.
- */
-struct PageTable {
-    explicit PageTable();
-    explicit PageTable(std::size_t address_space_width_in_bits);
-    ~PageTable();
-
-    /**
-     * Resizes the page table to be able to accomodate enough pages within
-     * a given address space.
-     *
-     * @param address_space_width_in_bits The address size width in bits.
-     */
-    void Resize(std::size_t address_space_width_in_bits);
-
-    /**
-     * Vector of memory pointers backing each page. An entry can only be non-null if the
-     * corresponding entry in the `attributes` vector is of type `Memory`.
-     */
-    std::vector<u8*> pointers;
-
-    /**
-     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
-     * of type `Special`.
-     */
-    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
-
-    /**
-     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
-     * the corresponding entry in `pointers` MUST be set to null.
-     */
-    std::vector<PageType> attributes;
-};
-
 /// Virtual user-space memory regions
 enum : VAddr {
-    /// Read-only page containing kernel and system configuration values.
-    CONFIG_MEMORY_VADDR = 0x1FF80000,
-    CONFIG_MEMORY_SIZE = 0x00001000,
-    CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE,
-
-    /// Usually read-only page containing mostly values read from hardware.
-    SHARED_PAGE_VADDR = 0x1FF81000,
-    SHARED_PAGE_SIZE = 0x00001000,
-    SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
-
     /// TLS (Thread-Local Storage) related.
     TLS_ENTRY_SIZE = 0x200,
 
@@ -115,9 +40,8 @@ enum : VAddr {
     KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
 };
 
-/// Currently active page table
-void SetCurrentPageTable(PageTable* page_table);
-PageTable* GetCurrentPageTable();
+/// Changes the currently active page table.
+void SetCurrentPageTable(Common::PageTable* page_table);
 
 /// Determines if the given VAddr is valid for the specified process.
 bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +85,4 @@ enum class FlushMode {
  */
 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
 
-/**
- * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
- * address region.
- */
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
-
 } // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 9a1a4f4be..5225ee8e2 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}
 
 namespace Memory {
 
@@ -17,7 +21,7 @@ namespace Memory {
  * @param size The amount of bytes to map. Must be page-aligned.
  * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
  */
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
 
 /**
  * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
  * @param size The amount of bytes to map. Must be page-aligned.
  * @param mmio_handler The handler that backs the mapping.
  */
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler);
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler);
 
-void UnmapRegion(PageTable& page_table, VAddr base, u64 size);
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
 
-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook);
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook);
 
 } // namespace Memory
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index c716a462b..4afd6c8a3 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -18,13 +18,13 @@ using std::chrono::microseconds;
 namespace Core {
 
 void PerfStats::BeginSystemFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     frame_begin = Clock::now();
 }
 
 void PerfStats::EndSystemFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     auto frame_end = Clock::now();
     accumulated_frametime += frame_end - frame_begin;
@@ -35,13 +35,13 @@ void PerfStats::EndSystemFrame() {
 }
 
 void PerfStats::EndGameFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     game_frames += 1;
 }
 
 PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     const auto now = Clock::now();
     // Walltime elapsed since stats were reset
@@ -67,7 +67,7 @@ PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us
 }
 
 double PerfStats::GetLastFrameTimeScale() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     constexpr double FRAME_LENGTH = 1.0 / 60;
     return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 26fcd3405..6d32ebea3 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -74,4 +74,35 @@ void Apply() {
     Service::HID::ReloadInputDevices();
 }
 
+template <typename T>
+void LogSetting(const std::string& name, const T& value) {
+    LOG_INFO(Config, "{}: {}", name, value);
+}
+
+void LogSettings() {
+    LOG_INFO(Config, "yuzu Configuration:");
+    LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
+    LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
+    LogSetting("System_CurrentUser", Settings::values.current_user);
+    LogSetting("System_LanguageIndex", Settings::values.language_index);
+    LogSetting("Core_UseCpuJit", Settings::values.use_cpu_jit);
+    LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
+    LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
+    LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
+    LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
+    LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
+    LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
+    LogSetting("Renderer_UseAsynchronousGpuEmulation",
+               Settings::values.use_asynchronous_gpu_emulation);
+    LogSetting("Audio_OutputEngine", Settings::values.sink_id);
+    LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
+    LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
+    LogSetting("DataStorage_UseVirtualSd", Settings::values.use_virtual_sd);
+    LogSetting("DataStorage_NandDir", Settings::values.nand_dir);
+    LogSetting("DataStorage_SdmcDir", Settings::values.sdmc_dir);
+    LogSetting("Debugging_UseGdbstub", Settings::values.use_gdbstub);
+    LogSetting("Debugging_GdbstubPort", Settings::values.gdbstub_port);
+    LogSetting("Debugging_ProgramArgs", Settings::values.program_args);
+}
+
 } // namespace Settings
diff --git a/src/core/settings.h b/src/core/settings.h
index 29ce98983..d543eb32f 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -349,7 +349,6 @@ struct TouchscreenInput {
 struct Values {
     // System
     bool use_docked_mode;
-    bool enable_nfc;
     std::optional<u32> rng_seed;
     // Measured in seconds since epoch
     std::optional<std::chrono::seconds> custom_rtc;
@@ -391,7 +390,9 @@ struct Values {
     float resolution_factor;
     bool use_frame_limit;
     u16 frame_limit;
+    bool use_disk_shader_cache;
     bool use_accurate_gpu_emulation;
+    bool use_asynchronous_gpu_emulation;
 
     float bg_red;
     float bg_green;
@@ -425,4 +426,5 @@ struct Values {
 } extern values;
 
 void Apply();
+void LogSettings();
 } // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 09ed74d78..e1db06811 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -158,8 +158,12 @@ TelemetrySession::TelemetrySession() {
     AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
              Settings::values.use_frame_limit);
     AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
+    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
+             Settings::values.use_disk_shader_cache);
     AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
              Settings::values.use_accurate_gpu_emulation);
+    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
+             Settings::values.use_asynchronous_gpu_emulation);
     AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
              Settings::values.use_docked_mode);
 }
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 1c7db28c0..5b4e032bd 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -7,15 +7,18 @@ add_library(input_common STATIC
     main.h
     motion_emu.cpp
     motion_emu.h
-
-    $<$<BOOL:${SDL2_FOUND}>:sdl/sdl.cpp sdl/sdl.h>
+    sdl/sdl.cpp
+    sdl/sdl.h
 )
 
-create_target_directory_groups(input_common)
-
-target_link_libraries(input_common PUBLIC core PRIVATE common)
-
 if(SDL2_FOUND)
+    target_sources(input_common PRIVATE
+        sdl/sdl_impl.cpp
+        sdl/sdl_impl.h
+    )
     target_link_libraries(input_common PRIVATE SDL2)
     target_compile_definitions(input_common PRIVATE HAVE_SDL2)
 endif()
+
+create_target_directory_groups(input_common)
+target_link_libraries(input_common PUBLIC core PRIVATE common)
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 525fe6abc..078374be5 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -36,18 +36,18 @@ struct KeyButtonPair {
 class KeyButtonList {
 public:
     void AddKeyButton(int key_code, KeyButton* key_button) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         list.push_back(KeyButtonPair{key_code, key_button});
     }
 
     void RemoveKeyButton(const KeyButton* key_button) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         list.remove_if(
             [key_button](const KeyButtonPair& pair) { return pair.key_button == key_button; });
     }
 
     void ChangeKeyStatus(int key_code, bool pressed) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         for (const KeyButtonPair& pair : list) {
             if (pair.key_code == key_code)
                 pair.key_button->status.store(pressed);
@@ -55,7 +55,7 @@ public:
     }
 
     void ChangeAllKeyStatus(bool pressed) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         for (const KeyButtonPair& pair : list) {
             pair.key_button->status.store(pressed);
         }
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 37f572853..8e66c1b15 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -17,10 +17,7 @@ namespace InputCommon {
 
 static std::shared_ptr<Keyboard> keyboard;
 static std::shared_ptr<MotionEmu> motion_emu;
-
-#ifdef HAVE_SDL2
-static std::thread poll_thread;
-#endif
+static std::unique_ptr<SDL::State> sdl;
 
 void Init() {
     keyboard = std::make_shared<Keyboard>();
@@ -30,15 +27,7 @@ void Init() {
     motion_emu = std::make_shared<MotionEmu>();
     Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu);
 
-#ifdef HAVE_SDL2
-    SDL::Init();
-#endif
-}
-
-void StartJoystickEventHandler() {
-#ifdef HAVE_SDL2
-    poll_thread = std::thread(SDL::PollLoop);
-#endif
+    sdl = SDL::Init();
 }
 
 void Shutdown() {
@@ -47,11 +36,7 @@ void Shutdown() {
     Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button");
     Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
     motion_emu.reset();
-
-#ifdef HAVE_SDL2
-    SDL::Shutdown();
-    poll_thread.join();
-#endif
+    sdl.reset();
 }
 
 Keyboard* GetKeyboard() {
@@ -88,7 +73,7 @@ namespace Polling {
 
 std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) {
 #ifdef HAVE_SDL2
-    return SDL::Polling::GetPollers(type);
+    return sdl->GetPollers(type);
 #else
     return {};
 #endif
diff --git a/src/input_common/main.h b/src/input_common/main.h
index 9eb13106e..77a0ce90b 100644
--- a/src/input_common/main.h
+++ b/src/input_common/main.h
@@ -20,8 +20,6 @@ void Init();
 /// Deregisters all built-in input device factories and shuts them down.
 void Shutdown();
 
-void StartJoystickEventHandler();
-
 class Keyboard;
 
 /// Gets the keyboard button device factory.
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 9570c060e..868251628 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,32 +32,32 @@ public:
     }
 
     void BeginTilt(int x, int y) {
-        mouse_origin = Math::MakeVec(x, y);
+        mouse_origin = Common::MakeVec(x, y);
         is_tilting = true;
     }
 
     void Tilt(int x, int y) {
-        auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
+        auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
         if (is_tilting) {
-            std::lock_guard<std::mutex> guard(tilt_mutex);
+            std::lock_guard guard{tilt_mutex};
             if (mouse_move.x == 0 && mouse_move.y == 0) {
                 tilt_angle = 0;
             } else {
                 tilt_direction = mouse_move.Cast<float>();
                 tilt_angle =
-                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
+                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
             }
         }
     }
 
     void EndTilt() {
-        std::lock_guard<std::mutex> guard(tilt_mutex);
+        std::lock_guard guard{tilt_mutex};
         tilt_angle = 0;
         is_tilting = false;
     }
 
-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
-        std::lock_guard<std::mutex> guard(status_mutex);
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
+        std::lock_guard guard{status_mutex};
         return status;
     }
 
@@ -66,17 +66,17 @@ private:
     const std::chrono::steady_clock::duration update_duration;
     const float sensitivity;
 
-    Math::Vec2<int> mouse_origin;
+    Common::Vec2<int> mouse_origin;
 
     std::mutex tilt_mutex;
-    Math::Vec2<float> tilt_direction;
+    Common::Vec2<float> tilt_direction;
     float tilt_angle = 0;
 
     bool is_tilting = false;
 
     Common::Event shutdown_event;
 
-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
     std::mutex status_mutex;
 
     // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,29 +85,29 @@ private:
 
     void MotionEmuThread() {
         auto update_time = std::chrono::steady_clock::now();
-        Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
-        Math::Quaternion<float> old_q;
+        Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
+        Common::Quaternion<float> old_q;
 
         while (!shutdown_event.WaitUntil(update_time)) {
             update_time += update_duration;
             old_q = q;
 
             {
-                std::lock_guard<std::mutex> guard(tilt_mutex);
+                std::lock_guard guard{tilt_mutex};
 
                 // Find the quaternion describing current 3DS tilting
-                q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
-                                   tilt_angle);
+                q = Common::MakeQuaternion(
+                    Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
             }
 
             auto inv_q = q.Inverse();
 
             // Set the gravity vector in world space
-            auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);
+            auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
 
             // Find the angular rate vector in world space
             auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
-            angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;
+            angular_rate *= 1000 / update_millisecond / Common::PI * 180;
 
             // Transform the two vectors from world space to 3DS space
             gravity = QuaternionRotate(inv_q, gravity);
@@ -115,7 +115,7 @@ private:
 
             // Update the sensor state
             {
-                std::lock_guard<std::mutex> guard(status_mutex);
+                std::lock_guard guard{status_mutex};
                 status = std::make_tuple(gravity, angular_rate);
             }
         }
@@ -131,7 +131,7 @@ public:
         device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
     }
 
-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
         return device->GetStatus();
     }
 
diff --git a/src/input_common/sdl/sdl.cpp b/src/input_common/sdl/sdl.cpp
index faf3c1fa3..644db3448 100644
--- a/src/input_common/sdl/sdl.cpp
+++ b/src/input_common/sdl/sdl.cpp
@@ -1,631 +1,19 @@
-// Copyright 2017 Citra Emulator Project
+// Copyright 2018 Citra Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <algorithm>
-#include <atomic>
-#include <cmath>
-#include <functional>
-#include <iterator>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-#include <SDL.h>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/math_util.h"
-#include "common/param_package.h"
-#include "common/threadsafe_queue.h"
-#include "input_common/main.h"
 #include "input_common/sdl/sdl.h"
+#ifdef HAVE_SDL2
+#include "input_common/sdl/sdl_impl.h"
+#endif
 
-namespace InputCommon {
+namespace InputCommon::SDL {
 
-namespace SDL {
-
-class SDLJoystick;
-class SDLButtonFactory;
-class SDLAnalogFactory;
-
-/// Map of GUID of a list of corresponding virtual Joysticks
-static std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
-static std::mutex joystick_map_mutex;
-
-static std::shared_ptr<SDLButtonFactory> button_factory;
-static std::shared_ptr<SDLAnalogFactory> analog_factory;
-
-/// Used by the Pollers during config
-static std::atomic<bool> polling;
-static Common::SPSCQueue<SDL_Event> event_queue;
-
-static std::atomic<bool> initialized = false;
-
-static std::string GetGUID(SDL_Joystick* joystick) {
-    SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
-    char guid_str[33];
-    SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
-    return guid_str;
-}
-
-class SDLJoystick {
-public:
-    SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
-                decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
-        : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
-
-    void SetButton(int button, bool value) {
-        std::lock_guard<std::mutex> lock(mutex);
-        state.buttons[button] = value;
-    }
-
-    bool GetButton(int button) const {
-        std::lock_guard<std::mutex> lock(mutex);
-        return state.buttons.at(button);
-    }
-
-    void SetAxis(int axis, Sint16 value) {
-        std::lock_guard<std::mutex> lock(mutex);
-        state.axes[axis] = value;
-    }
-
-    float GetAxis(int axis) const {
-        std::lock_guard<std::mutex> lock(mutex);
-        return state.axes.at(axis) / 32767.0f;
-    }
-
-    std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
-        float x = GetAxis(axis_x);
-        float y = GetAxis(axis_y);
-        y = -y; // 3DS uses an y-axis inverse from SDL
-
-        // Make sure the coordinates are in the unit circle,
-        // otherwise normalize it.
-        float r = x * x + y * y;
-        if (r > 1.0f) {
-            r = std::sqrt(r);
-            x /= r;
-            y /= r;
-        }
-
-        return std::make_tuple(x, y);
-    }
-
-    void SetHat(int hat, Uint8 direction) {
-        std::lock_guard<std::mutex> lock(mutex);
-        state.hats[hat] = direction;
-    }
-
-    bool GetHatDirection(int hat, Uint8 direction) const {
-        std::lock_guard<std::mutex> lock(mutex);
-        return (state.hats.at(hat) & direction) != 0;
-    }
-    /**
-     * The guid of the joystick
-     */
-    const std::string& GetGUID() const {
-        return guid;
-    }
-
-    /**
-     * The number of joystick from the same type that were connected before this joystick
-     */
-    int GetPort() const {
-        return port;
-    }
-
-    SDL_Joystick* GetSDLJoystick() const {
-        return sdl_joystick.get();
-    }
-
-    void SetSDLJoystick(SDL_Joystick* joystick,
-                        decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
-        sdl_joystick =
-            std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
-    }
-
-private:
-    struct State {
-        std::unordered_map<int, bool> buttons;
-        std::unordered_map<int, Sint16> axes;
-        std::unordered_map<int, Uint8> hats;
-    } state;
-    std::string guid;
-    int port;
-    std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
-    mutable std::mutex mutex;
-};
-
-/**
- * Get the nth joystick with the corresponding GUID
- */
-static std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    const auto it = joystick_map.find(guid);
-    if (it != joystick_map.end()) {
-        while (it->second.size() <= port) {
-            auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
-                                                          [](SDL_Joystick*) {});
-            it->second.emplace_back(std::move(joystick));
-        }
-        return it->second[port];
-    }
-    auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
-    return joystick_map[guid].emplace_back(std::move(joystick));
-}
-
-/**
- * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
- * it to a SDLJoystick with the same guid and that port
- */
-static std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
-    const std::string guid = GetGUID(sdl_joystick);
-    auto map_it = joystick_map.find(guid);
-    if (map_it != joystick_map.end()) {
-        auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
-                                   [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
-                                       return sdl_joystick == joystick->GetSDLJoystick();
-                                   });
-        if (vec_it != map_it->second.end()) {
-            // This is the common case: There is already an existing SDL_Joystick maped to a
-            // SDLJoystick. return the SDLJoystick
-            return *vec_it;
-        }
-        // Search for a SDLJoystick without a mapped SDL_Joystick...
-        auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
-                                       [](const std::shared_ptr<SDLJoystick>& joystick) {
-                                           return !joystick->GetSDLJoystick();
-                                       });
-        if (nullptr_it != map_it->second.end()) {
-            // ... and map it
-            (*nullptr_it)->SetSDLJoystick(sdl_joystick);
-            return *nullptr_it;
-        }
-        // There is no SDLJoystick without a mapped SDL_Joystick
-        // Create a new SDLJoystick
-        auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
-        return map_it->second.emplace_back(std::move(joystick));
-    }
-    auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
-    return joystick_map[guid].emplace_back(std::move(joystick));
-}
-
-void InitJoystick(int joystick_index) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
-    if (!sdl_joystick) {
-        LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
-        return;
-    }
-    std::string guid = GetGUID(sdl_joystick);
-    if (joystick_map.find(guid) == joystick_map.end()) {
-        auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
-        joystick_map[guid].emplace_back(std::move(joystick));
-        return;
-    }
-    auto& joystick_guid_list = joystick_map[guid];
-    const auto it = std::find_if(
-        joystick_guid_list.begin(), joystick_guid_list.end(),
-        [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
-    if (it != joystick_guid_list.end()) {
-        (*it)->SetSDLJoystick(sdl_joystick);
-        return;
-    }
-    auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
-    joystick_guid_list.emplace_back(std::move(joystick));
-}
-
-void CloseJoystick(SDL_Joystick* sdl_joystick) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    std::string guid = GetGUID(sdl_joystick);
-    // This call to guid is save since the joystick is guranteed to be in that map
-    auto& joystick_guid_list = joystick_map[guid];
-    const auto joystick_it =
-        std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
-                     [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
-                         return joystick->GetSDLJoystick() == sdl_joystick;
-                     });
-    (*joystick_it)->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
-}
-
-void HandleGameControllerEvent(const SDL_Event& event) {
-    switch (event.type) {
-    case SDL_JOYBUTTONUP: {
-        auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
-        if (joystick) {
-            joystick->SetButton(event.jbutton.button, false);
-        }
-        break;
-    }
-    case SDL_JOYBUTTONDOWN: {
-        auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
-        if (joystick) {
-            joystick->SetButton(event.jbutton.button, true);
-        }
-        break;
-    }
-    case SDL_JOYHATMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
-        if (joystick) {
-            joystick->SetHat(event.jhat.hat, event.jhat.value);
-        }
-        break;
-    }
-    case SDL_JOYAXISMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
-        if (joystick) {
-            joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
-        }
-        break;
-    }
-    case SDL_JOYDEVICEREMOVED:
-        LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
-        CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
-        break;
-    case SDL_JOYDEVICEADDED:
-        LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
-        InitJoystick(event.jdevice.which);
-        break;
-    }
-}
-
-void CloseSDLJoysticks() {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    joystick_map.clear();
-}
-
-void PollLoop() {
-    if (SDL_Init(SDL_INIT_JOYSTICK) < 0) {
-        LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
-        return;
-    }
-
-    SDL_Event event;
-    while (initialized) {
-        // Wait for 10 ms or until an event happens
-        if (SDL_WaitEventTimeout(&event, 10)) {
-            // Don't handle the event if we are configuring
-            if (polling) {
-                event_queue.Push(event);
-            } else {
-                HandleGameControllerEvent(event);
-            }
-        }
-    }
-    CloseSDLJoysticks();
-    SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
-}
-
-class SDLButton final : public Input::ButtonDevice {
-public:
-    explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
-        : joystick(std::move(joystick_)), button(button_) {}
-
-    bool GetStatus() const override {
-        return joystick->GetButton(button);
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int button;
-};
-
-class SDLDirectionButton final : public Input::ButtonDevice {
-public:
-    explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
-        : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
-
-    bool GetStatus() const override {
-        return joystick->GetHatDirection(hat, direction);
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int hat;
-    Uint8 direction;
-};
-
-class SDLAxisButton final : public Input::ButtonDevice {
-public:
-    explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
-                           bool trigger_if_greater_)
-        : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
-          trigger_if_greater(trigger_if_greater_) {}
-
-    bool GetStatus() const override {
-        float axis_value = joystick->GetAxis(axis);
-        if (trigger_if_greater)
-            return axis_value > threshold;
-        return axis_value < threshold;
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int axis;
-    float threshold;
-    bool trigger_if_greater;
-};
-
-class SDLAnalog final : public Input::AnalogDevice {
-public:
-    SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_)
-        : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_) {}
-
-    std::tuple<float, float> GetStatus() const override {
-        return joystick->GetAnalog(axis_x, axis_y);
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int axis_x;
-    int axis_y;
-};
-
-/// A button device factory that creates button devices from SDL joystick
-class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
-public:
-    /**
-     * Creates a button device from a joystick button
-     * @param params contains parameters for creating the device:
-     *     - "guid": the guid of the joystick to bind
-     *     - "port": the nth joystick of the same type to bind
-     *     - "button"(optional): the index of the button to bind
-     *     - "hat"(optional): the index of the hat to bind as direction buttons
-     *     - "axis"(optional): the index of the axis to bind
-     *     - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
-     *         "down", "left" or "right"
-     *     - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
-     *         triggered if the axis value crosses
-     *     - "direction"(only used for axis): "+" means the button is triggered when the axis
-     * value is greater than the threshold; "-" means the button is triggered when the axis
-     * value is smaller than the threshold
-     */
-    std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
-        const std::string guid = params.Get("guid", "0");
-        const int port = params.Get("port", 0);
-
-        auto joystick = GetSDLJoystickByGUID(guid, port);
-
-        if (params.Has("hat")) {
-            const int hat = params.Get("hat", 0);
-            const std::string direction_name = params.Get("direction", "");
-            Uint8 direction;
-            if (direction_name == "up") {
-                direction = SDL_HAT_UP;
-            } else if (direction_name == "down") {
-                direction = SDL_HAT_DOWN;
-            } else if (direction_name == "left") {
-                direction = SDL_HAT_LEFT;
-            } else if (direction_name == "right") {
-                direction = SDL_HAT_RIGHT;
-            } else {
-                direction = 0;
-            }
-            // This is necessary so accessing GetHat with hat won't crash
-            joystick->SetHat(hat, SDL_HAT_CENTERED);
-            return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
-        }
-
-        if (params.Has("axis")) {
-            const int axis = params.Get("axis", 0);
-            const float threshold = params.Get("threshold", 0.5f);
-            const std::string direction_name = params.Get("direction", "");
-            bool trigger_if_greater;
-            if (direction_name == "+") {
-                trigger_if_greater = true;
-            } else if (direction_name == "-") {
-                trigger_if_greater = false;
-            } else {
-                trigger_if_greater = true;
-                LOG_ERROR(Input, "Unknown direction '{}'", direction_name);
-            }
-            // This is necessary so accessing GetAxis with axis won't crash
-            joystick->SetAxis(axis, 0);
-            return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
-        }
-
-        const int button = params.Get("button", 0);
-        // This is necessary so accessing GetButton with button won't crash
-        joystick->SetButton(button, false);
-        return std::make_unique<SDLButton>(joystick, button);
-    }
-};
-
-/// An analog device factory that creates analog devices from SDL joystick
-class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
-public:
-    /**
-     * Creates analog device from joystick axes
-     * @param params contains parameters for creating the device:
-     *     - "guid": the guid of the joystick to bind
-     *     - "port": the nth joystick of the same type
-     *     - "axis_x": the index of the axis to be bind as x-axis
-     *     - "axis_y": the index of the axis to be bind as y-axis
-     */
-    std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
-        const std::string guid = params.Get("guid", "0");
-        const int port = params.Get("port", 0);
-        const int axis_x = params.Get("axis_x", 0);
-        const int axis_y = params.Get("axis_y", 1);
-
-        auto joystick = GetSDLJoystickByGUID(guid, port);
-
-        // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
-        joystick->SetAxis(axis_x, 0);
-        joystick->SetAxis(axis_y, 0);
-        return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y);
-    }
-};
-
-void Init() {
-    using namespace Input;
-    RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>());
-    RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>());
-    polling = false;
-    initialized = true;
-}
-
-void Shutdown() {
-    if (initialized) {
-        using namespace Input;
-        UnregisterFactory<ButtonDevice>("sdl");
-        UnregisterFactory<AnalogDevice>("sdl");
-        initialized = false;
-    }
-}
-
-Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event) {
-    Common::ParamPackage params({{"engine", "sdl"}});
-    switch (event.type) {
-    case SDL_JOYAXISMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
-        params.Set("port", joystick->GetPort());
-        params.Set("guid", joystick->GetGUID());
-        params.Set("axis", event.jaxis.axis);
-        if (event.jaxis.value > 0) {
-            params.Set("direction", "+");
-            params.Set("threshold", "0.5");
-        } else {
-            params.Set("direction", "-");
-            params.Set("threshold", "-0.5");
-        }
-        break;
-    }
-    case SDL_JOYBUTTONUP: {
-        auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
-        params.Set("port", joystick->GetPort());
-        params.Set("guid", joystick->GetGUID());
-        params.Set("button", event.jbutton.button);
-        break;
-    }
-    case SDL_JOYHATMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
-        params.Set("port", joystick->GetPort());
-        params.Set("guid", joystick->GetGUID());
-        params.Set("hat", event.jhat.hat);
-        switch (event.jhat.value) {
-        case SDL_HAT_UP:
-            params.Set("direction", "up");
-            break;
-        case SDL_HAT_DOWN:
-            params.Set("direction", "down");
-            break;
-        case SDL_HAT_LEFT:
-            params.Set("direction", "left");
-            break;
-        case SDL_HAT_RIGHT:
-            params.Set("direction", "right");
-            break;
-        default:
-            return {};
-        }
-        break;
-    }
-    }
-    return params;
-}
-
-namespace Polling {
-
-class SDLPoller : public InputCommon::Polling::DevicePoller {
-public:
-    void Start() override {
-        event_queue.Clear();
-        polling = true;
-    }
-
-    void Stop() override {
-        polling = false;
-    }
-};
-
-class SDLButtonPoller final : public SDLPoller {
-public:
-    Common::ParamPackage GetNextInput() override {
-        SDL_Event event;
-        while (event_queue.Pop(event)) {
-            switch (event.type) {
-            case SDL_JOYAXISMOTION:
-                if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
-                    break;
-                }
-            case SDL_JOYBUTTONUP:
-            case SDL_JOYHATMOTION:
-                return SDLEventToButtonParamPackage(event);
-            }
-        }
-        return {};
-    }
-};
-
-class SDLAnalogPoller final : public SDLPoller {
-public:
-    void Start() override {
-        SDLPoller::Start();
-
-        // Reset stored axes
-        analog_xaxis = -1;
-        analog_yaxis = -1;
-        analog_axes_joystick = -1;
-    }
-
-    Common::ParamPackage GetNextInput() override {
-        SDL_Event event;
-        while (event_queue.Pop(event)) {
-            if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
-                continue;
-            }
-            // An analog device needs two axes, so we need to store the axis for later and wait for
-            // a second SDL event. The axes also must be from the same joystick.
-            int axis = event.jaxis.axis;
-            if (analog_xaxis == -1) {
-                analog_xaxis = axis;
-                analog_axes_joystick = event.jaxis.which;
-            } else if (analog_yaxis == -1 && analog_xaxis != axis &&
-                       analog_axes_joystick == event.jaxis.which) {
-                analog_yaxis = axis;
-            }
-        }
-        Common::ParamPackage params;
-        if (analog_xaxis != -1 && analog_yaxis != -1) {
-            auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
-            params.Set("engine", "sdl");
-            params.Set("port", joystick->GetPort());
-            params.Set("guid", joystick->GetGUID());
-            params.Set("axis_x", analog_xaxis);
-            params.Set("axis_y", analog_yaxis);
-            analog_xaxis = -1;
-            analog_yaxis = -1;
-            analog_axes_joystick = -1;
-            return params;
-        }
-        return params;
-    }
-
-private:
-    int analog_xaxis = -1;
-    int analog_yaxis = -1;
-    SDL_JoystickID analog_axes_joystick = -1;
-};
-
-std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
-    InputCommon::Polling::DeviceType type) {
-    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
-    switch (type) {
-    case InputCommon::Polling::DeviceType::Analog:
-        pollers.push_back(std::make_unique<SDLAnalogPoller>());
-        break;
-    case InputCommon::Polling::DeviceType::Button:
-        pollers.push_back(std::make_unique<SDLButtonPoller>());
-        break;
-    }
-    return pollers;
+std::unique_ptr<State> Init() {
+#ifdef HAVE_SDL2
+    return std::make_unique<SDLState>();
+#else
+    return std::make_unique<NullState>();
+#endif
 }
-} // namespace Polling
-} // namespace SDL
-} // namespace InputCommon
+} // namespace InputCommon::SDL
diff --git a/src/input_common/sdl/sdl.h b/src/input_common/sdl/sdl.h
index 0206860d3..d7f24c68a 100644
--- a/src/input_common/sdl/sdl.h
+++ b/src/input_common/sdl/sdl.h
@@ -1,4 +1,4 @@
-// Copyright 2017 Citra Emulator Project
+// Copyright 2018 Citra Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
@@ -7,45 +7,38 @@
 #include <memory>
 #include <vector>
 #include "core/frontend/input.h"
+#include "input_common/main.h"
 
 union SDL_Event;
+
 namespace Common {
 class ParamPackage;
-}
-namespace InputCommon {
-namespace Polling {
+} // namespace Common
+
+namespace InputCommon::Polling {
 class DevicePoller;
 enum class DeviceType;
-} // namespace Polling
-} // namespace InputCommon
-
-namespace InputCommon {
-namespace SDL {
-
-/// Initializes and registers SDL device factories
-void Init();
-
-/// Unresisters SDL device factories and shut them down.
-void Shutdown();
+} // namespace InputCommon::Polling
 
-/// Needs to be called before SDL_QuitSubSystem.
-void CloseSDLJoysticks();
+namespace InputCommon::SDL {
 
-/// Handle SDL_Events for joysticks from SDL_PollEvent
-void HandleGameControllerEvent(const SDL_Event& event);
+class State {
+public:
+    using Pollers = std::vector<std::unique_ptr<Polling::DevicePoller>>;
 
-/// A Loop that calls HandleGameControllerEvent until Shutdown is called
-void PollLoop();
+    /// Unregisters SDL device factories and shut them down.
+    virtual ~State() = default;
 
-/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
-Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event);
+    virtual Pollers GetPollers(Polling::DeviceType type) = 0;
+};
 
-namespace Polling {
+class NullState : public State {
+public:
+    Pollers GetPollers(Polling::DeviceType type) override {
+        return {};
+    }
+};
 
-/// Get all DevicePoller that use the SDL backend for a specific device type
-std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
-    InputCommon::Polling::DeviceType type);
+std::unique_ptr<State> Init();
 
-} // namespace Polling
-} // namespace SDL
-} // namespace InputCommon
+} // namespace InputCommon::SDL
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
new file mode 100644
index 000000000..5949ecbae
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -0,0 +1,671 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <functional>
+#include <iterator>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <SDL.h>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "common/param_package.h"
+#include "common/threadsafe_queue.h"
+#include "core/frontend/input.h"
+#include "input_common/sdl/sdl_impl.h"
+
+namespace InputCommon {
+
+namespace SDL {
+
+static std::string GetGUID(SDL_Joystick* joystick) {
+    SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
+    char guid_str[33];
+    SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
+    return guid_str;
+}
+
+/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
+static Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event);
+
+static int SDLEventWatcher(void* userdata, SDL_Event* event) {
+    SDLState* sdl_state = reinterpret_cast<SDLState*>(userdata);
+    // Don't handle the event if we are configuring
+    if (sdl_state->polling) {
+        sdl_state->event_queue.Push(*event);
+    } else {
+        sdl_state->HandleGameControllerEvent(*event);
+    }
+    return 0;
+}
+
+class SDLJoystick {
+public:
+    SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
+                decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
+        : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
+
+    void SetButton(int button, bool value) {
+        std::lock_guard lock{mutex};
+        state.buttons[button] = value;
+    }
+
+    bool GetButton(int button) const {
+        std::lock_guard lock{mutex};
+        return state.buttons.at(button);
+    }
+
+    void SetAxis(int axis, Sint16 value) {
+        std::lock_guard lock{mutex};
+        state.axes[axis] = value;
+    }
+
+    float GetAxis(int axis) const {
+        std::lock_guard lock{mutex};
+        return state.axes.at(axis) / 32767.0f;
+    }
+
+    std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
+        float x = GetAxis(axis_x);
+        float y = GetAxis(axis_y);
+        y = -y; // 3DS uses an y-axis inverse from SDL
+
+        // Make sure the coordinates are in the unit circle,
+        // otherwise normalize it.
+        float r = x * x + y * y;
+        if (r > 1.0f) {
+            r = std::sqrt(r);
+            x /= r;
+            y /= r;
+        }
+
+        return std::make_tuple(x, y);
+    }
+
+    void SetHat(int hat, Uint8 direction) {
+        std::lock_guard lock{mutex};
+        state.hats[hat] = direction;
+    }
+
+    bool GetHatDirection(int hat, Uint8 direction) const {
+        std::lock_guard lock{mutex};
+        return (state.hats.at(hat) & direction) != 0;
+    }
+    /**
+     * The guid of the joystick
+     */
+    const std::string& GetGUID() const {
+        return guid;
+    }
+
+    /**
+     * The number of joystick from the same type that were connected before this joystick
+     */
+    int GetPort() const {
+        return port;
+    }
+
+    SDL_Joystick* GetSDLJoystick() const {
+        return sdl_joystick.get();
+    }
+
+    void SetSDLJoystick(SDL_Joystick* joystick,
+                        decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
+        sdl_joystick =
+            std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
+    }
+
+private:
+    struct State {
+        std::unordered_map<int, bool> buttons;
+        std::unordered_map<int, Sint16> axes;
+        std::unordered_map<int, Uint8> hats;
+    } state;
+    std::string guid;
+    int port;
+    std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
+    mutable std::mutex mutex;
+};
+
+/**
+ * Get the nth joystick with the corresponding GUID
+ */
+std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
+    std::lock_guard lock{joystick_map_mutex};
+    const auto it = joystick_map.find(guid);
+    if (it != joystick_map.end()) {
+        while (it->second.size() <= port) {
+            auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
+                                                          [](SDL_Joystick*) {});
+            it->second.emplace_back(std::move(joystick));
+        }
+        return it->second[port];
+    }
+    auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
+    return joystick_map[guid].emplace_back(std::move(joystick));
+}
+
+/**
+ * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
+ * it to a SDLJoystick with the same guid and that port
+ */
+std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
+    auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
+    const std::string guid = GetGUID(sdl_joystick);
+
+    std::lock_guard lock{joystick_map_mutex};
+    auto map_it = joystick_map.find(guid);
+    if (map_it != joystick_map.end()) {
+        auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
+                                   [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
+                                       return sdl_joystick == joystick->GetSDLJoystick();
+                                   });
+        if (vec_it != map_it->second.end()) {
+            // This is the common case: There is already an existing SDL_Joystick maped to a
+            // SDLJoystick. return the SDLJoystick
+            return *vec_it;
+        }
+        // Search for a SDLJoystick without a mapped SDL_Joystick...
+        auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
+                                       [](const std::shared_ptr<SDLJoystick>& joystick) {
+                                           return !joystick->GetSDLJoystick();
+                                       });
+        if (nullptr_it != map_it->second.end()) {
+            // ... and map it
+            (*nullptr_it)->SetSDLJoystick(sdl_joystick);
+            return *nullptr_it;
+        }
+        // There is no SDLJoystick without a mapped SDL_Joystick
+        // Create a new SDLJoystick
+        auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
+        return map_it->second.emplace_back(std::move(joystick));
+    }
+    auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
+    return joystick_map[guid].emplace_back(std::move(joystick));
+}
+
+void SDLState::InitJoystick(int joystick_index) {
+    SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
+    if (!sdl_joystick) {
+        LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
+        return;
+    }
+    const std::string guid = GetGUID(sdl_joystick);
+
+    std::lock_guard lock{joystick_map_mutex};
+    if (joystick_map.find(guid) == joystick_map.end()) {
+        auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
+        joystick_map[guid].emplace_back(std::move(joystick));
+        return;
+    }
+    auto& joystick_guid_list = joystick_map[guid];
+    const auto it = std::find_if(
+        joystick_guid_list.begin(), joystick_guid_list.end(),
+        [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
+    if (it != joystick_guid_list.end()) {
+        (*it)->SetSDLJoystick(sdl_joystick);
+        return;
+    }
+    auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
+    joystick_guid_list.emplace_back(std::move(joystick));
+}
+
+void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
+    std::string guid = GetGUID(sdl_joystick);
+    std::shared_ptr<SDLJoystick> joystick;
+    {
+        std::lock_guard lock{joystick_map_mutex};
+        // This call to guid is safe since the joystick is guaranteed to be in the map
+        auto& joystick_guid_list = joystick_map[guid];
+        const auto joystick_it =
+            std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
+                         [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
+                             return joystick->GetSDLJoystick() == sdl_joystick;
+                         });
+        joystick = *joystick_it;
+    }
+    // Destruct SDL_Joystick outside the lock guard because SDL can internally call event calback
+    // which locks the mutex again
+    joystick->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
+}
+
+void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
+    switch (event.type) {
+    case SDL_JOYBUTTONUP: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
+            joystick->SetButton(event.jbutton.button, false);
+        }
+        break;
+    }
+    case SDL_JOYBUTTONDOWN: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
+            joystick->SetButton(event.jbutton.button, true);
+        }
+        break;
+    }
+    case SDL_JOYHATMOTION: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jhat.which)) {
+            joystick->SetHat(event.jhat.hat, event.jhat.value);
+        }
+        break;
+    }
+    case SDL_JOYAXISMOTION: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jaxis.which)) {
+            joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
+        }
+        break;
+    }
+    case SDL_JOYDEVICEREMOVED:
+        LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
+        CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
+        break;
+    case SDL_JOYDEVICEADDED:
+        LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
+        InitJoystick(event.jdevice.which);
+        break;
+    }
+}
+
+void SDLState::CloseJoysticks() {
+    std::lock_guard lock{joystick_map_mutex};
+    joystick_map.clear();
+}
+
+class SDLButton final : public Input::ButtonDevice {
+public:
+    explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
+        : joystick(std::move(joystick_)), button(button_) {}
+
+    bool GetStatus() const override {
+        return joystick->GetButton(button);
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    int button;
+};
+
+class SDLDirectionButton final : public Input::ButtonDevice {
+public:
+    explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
+        : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
+
+    bool GetStatus() const override {
+        return joystick->GetHatDirection(hat, direction);
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    int hat;
+    Uint8 direction;
+};
+
+class SDLAxisButton final : public Input::ButtonDevice {
+public:
+    explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
+                           bool trigger_if_greater_)
+        : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
+          trigger_if_greater(trigger_if_greater_) {}
+
+    bool GetStatus() const override {
+        float axis_value = joystick->GetAxis(axis);
+        if (trigger_if_greater)
+            return axis_value > threshold;
+        return axis_value < threshold;
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    int axis;
+    float threshold;
+    bool trigger_if_greater;
+};
+
+class SDLAnalog final : public Input::AnalogDevice {
+public:
+    SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, float deadzone_)
+        : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_) {}
+
+    std::tuple<float, float> GetStatus() const override {
+        const auto [x, y] = joystick->GetAnalog(axis_x, axis_y);
+        const float r = std::sqrt((x * x) + (y * y));
+        if (r > deadzone) {
+            return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
+                                   y / r * (r - deadzone) / (1 - deadzone));
+        }
+        return std::make_tuple<float, float>(0.0f, 0.0f);
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    const int axis_x;
+    const int axis_y;
+    const float deadzone;
+};
+
+/// A button device factory that creates button devices from SDL joystick
+class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
+public:
+    explicit SDLButtonFactory(SDLState& state_) : state(state_) {}
+
+    /**
+     * Creates a button device from a joystick button
+     * @param params contains parameters for creating the device:
+     *     - "guid": the guid of the joystick to bind
+     *     - "port": the nth joystick of the same type to bind
+     *     - "button"(optional): the index of the button to bind
+     *     - "hat"(optional): the index of the hat to bind as direction buttons
+     *     - "axis"(optional): the index of the axis to bind
+     *     - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
+     *         "down", "left" or "right"
+     *     - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
+     *         triggered if the axis value crosses
+     *     - "direction"(only used for axis): "+" means the button is triggered when the axis
+     * value is greater than the threshold; "-" means the button is triggered when the axis
+     * value is smaller than the threshold
+     */
+    std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
+        const std::string guid = params.Get("guid", "0");
+        const int port = params.Get("port", 0);
+
+        auto joystick = state.GetSDLJoystickByGUID(guid, port);
+
+        if (params.Has("hat")) {
+            const int hat = params.Get("hat", 0);
+            const std::string direction_name = params.Get("direction", "");
+            Uint8 direction;
+            if (direction_name == "up") {
+                direction = SDL_HAT_UP;
+            } else if (direction_name == "down") {
+                direction = SDL_HAT_DOWN;
+            } else if (direction_name == "left") {
+                direction = SDL_HAT_LEFT;
+            } else if (direction_name == "right") {
+                direction = SDL_HAT_RIGHT;
+            } else {
+                direction = 0;
+            }
+            // This is necessary so accessing GetHat with hat won't crash
+            joystick->SetHat(hat, SDL_HAT_CENTERED);
+            return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
+        }
+
+        if (params.Has("axis")) {
+            const int axis = params.Get("axis", 0);
+            const float threshold = params.Get("threshold", 0.5f);
+            const std::string direction_name = params.Get("direction", "");
+            bool trigger_if_greater;
+            if (direction_name == "+") {
+                trigger_if_greater = true;
+            } else if (direction_name == "-") {
+                trigger_if_greater = false;
+            } else {
+                trigger_if_greater = true;
+                LOG_ERROR(Input, "Unknown direction {}", direction_name);
+            }
+            // This is necessary so accessing GetAxis with axis won't crash
+            joystick->SetAxis(axis, 0);
+            return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
+        }
+
+        const int button = params.Get("button", 0);
+        // This is necessary so accessing GetButton with button won't crash
+        joystick->SetButton(button, false);
+        return std::make_unique<SDLButton>(joystick, button);
+    }
+
+private:
+    SDLState& state;
+};
+
+/// An analog device factory that creates analog devices from SDL joystick
+class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
+public:
+    explicit SDLAnalogFactory(SDLState& state_) : state(state_) {}
+    /**
+     * Creates analog device from joystick axes
+     * @param params contains parameters for creating the device:
+     *     - "guid": the guid of the joystick to bind
+     *     - "port": the nth joystick of the same type
+     *     - "axis_x": the index of the axis to be bind as x-axis
+     *     - "axis_y": the index of the axis to be bind as y-axis
+     */
+    std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
+        const std::string guid = params.Get("guid", "0");
+        const int port = params.Get("port", 0);
+        const int axis_x = params.Get("axis_x", 0);
+        const int axis_y = params.Get("axis_y", 1);
+        float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, .99f);
+
+        auto joystick = state.GetSDLJoystickByGUID(guid, port);
+
+        // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
+        joystick->SetAxis(axis_x, 0);
+        joystick->SetAxis(axis_y, 0);
+        return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone);
+    }
+
+private:
+    SDLState& state;
+};
+
+SDLState::SDLState() {
+    using namespace Input;
+    RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>(*this));
+    RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>(*this));
+
+    // If the frontend is going to manage the event loop, then we dont start one here
+    start_thread = !SDL_WasInit(SDL_INIT_JOYSTICK);
+    if (start_thread && SDL_Init(SDL_INIT_JOYSTICK) < 0) {
+        LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
+        return;
+    }
+    if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
+        LOG_ERROR(Input, "Failed to set Hint for background events", SDL_GetError());
+    }
+
+    SDL_AddEventWatch(&SDLEventWatcher, this);
+
+    initialized = true;
+    if (start_thread) {
+        poll_thread = std::thread([this] {
+            using namespace std::chrono_literals;
+            while (initialized) {
+                SDL_PumpEvents();
+                std::this_thread::sleep_for(10ms);
+            }
+        });
+    }
+    // Because the events for joystick connection happens before we have our event watcher added, we
+    // can just open all the joysticks right here
+    for (int i = 0; i < SDL_NumJoysticks(); ++i) {
+        InitJoystick(i);
+    }
+}
+
+SDLState::~SDLState() {
+    using namespace Input;
+    UnregisterFactory<ButtonDevice>("sdl");
+    UnregisterFactory<AnalogDevice>("sdl");
+
+    CloseJoysticks();
+    SDL_DelEventWatch(&SDLEventWatcher, this);
+
+    initialized = false;
+    if (start_thread) {
+        poll_thread.join();
+        SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
+    }
+}
+
+Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event) {
+    Common::ParamPackage params({{"engine", "sdl"}});
+
+    switch (event.type) {
+    case SDL_JOYAXISMOTION: {
+        auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
+        params.Set("port", joystick->GetPort());
+        params.Set("guid", joystick->GetGUID());
+        params.Set("axis", event.jaxis.axis);
+        if (event.jaxis.value > 0) {
+            params.Set("direction", "+");
+            params.Set("threshold", "0.5");
+        } else {
+            params.Set("direction", "-");
+            params.Set("threshold", "-0.5");
+        }
+        break;
+    }
+    case SDL_JOYBUTTONUP: {
+        auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which);
+        params.Set("port", joystick->GetPort());
+        params.Set("guid", joystick->GetGUID());
+        params.Set("button", event.jbutton.button);
+        break;
+    }
+    case SDL_JOYHATMOTION: {
+        auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which);
+        params.Set("port", joystick->GetPort());
+        params.Set("guid", joystick->GetGUID());
+        params.Set("hat", event.jhat.hat);
+        switch (event.jhat.value) {
+        case SDL_HAT_UP:
+            params.Set("direction", "up");
+            break;
+        case SDL_HAT_DOWN:
+            params.Set("direction", "down");
+            break;
+        case SDL_HAT_LEFT:
+            params.Set("direction", "left");
+            break;
+        case SDL_HAT_RIGHT:
+            params.Set("direction", "right");
+            break;
+        default:
+            return {};
+        }
+        break;
+    }
+    }
+    return params;
+}
+
+namespace Polling {
+
+class SDLPoller : public InputCommon::Polling::DevicePoller {
+public:
+    explicit SDLPoller(SDLState& state_) : state(state_) {}
+
+    void Start() override {
+        state.event_queue.Clear();
+        state.polling = true;
+    }
+
+    void Stop() override {
+        state.polling = false;
+    }
+
+protected:
+    SDLState& state;
+};
+
+class SDLButtonPoller final : public SDLPoller {
+public:
+    explicit SDLButtonPoller(SDLState& state_) : SDLPoller(state_) {}
+
+    Common::ParamPackage GetNextInput() override {
+        SDL_Event event;
+        while (state.event_queue.Pop(event)) {
+            switch (event.type) {
+            case SDL_JOYAXISMOTION:
+                if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
+                    break;
+                }
+            case SDL_JOYBUTTONUP:
+            case SDL_JOYHATMOTION:
+                return SDLEventToButtonParamPackage(state, event);
+            }
+        }
+        return {};
+    }
+};
+
+class SDLAnalogPoller final : public SDLPoller {
+public:
+    explicit SDLAnalogPoller(SDLState& state_) : SDLPoller(state_) {}
+
+    void Start() override {
+        SDLPoller::Start();
+
+        // Reset stored axes
+        analog_xaxis = -1;
+        analog_yaxis = -1;
+        analog_axes_joystick = -1;
+    }
+
+    Common::ParamPackage GetNextInput() override {
+        SDL_Event event;
+        while (state.event_queue.Pop(event)) {
+            if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
+                continue;
+            }
+            // An analog device needs two axes, so we need to store the axis for later and wait for
+            // a second SDL event. The axes also must be from the same joystick.
+            int axis = event.jaxis.axis;
+            if (analog_xaxis == -1) {
+                analog_xaxis = axis;
+                analog_axes_joystick = event.jaxis.which;
+            } else if (analog_yaxis == -1 && analog_xaxis != axis &&
+                       analog_axes_joystick == event.jaxis.which) {
+                analog_yaxis = axis;
+            }
+        }
+        Common::ParamPackage params;
+        if (analog_xaxis != -1 && analog_yaxis != -1) {
+            auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
+            params.Set("engine", "sdl");
+            params.Set("port", joystick->GetPort());
+            params.Set("guid", joystick->GetGUID());
+            params.Set("axis_x", analog_xaxis);
+            params.Set("axis_y", analog_yaxis);
+            analog_xaxis = -1;
+            analog_yaxis = -1;
+            analog_axes_joystick = -1;
+            return params;
+        }
+        return params;
+    }
+
+private:
+    int analog_xaxis = -1;
+    int analog_yaxis = -1;
+    SDL_JoystickID analog_axes_joystick = -1;
+};
+} // namespace Polling
+
+SDLState::Pollers SDLState::GetPollers(InputCommon::Polling::DeviceType type) {
+    Pollers pollers;
+
+    switch (type) {
+    case InputCommon::Polling::DeviceType::Analog:
+        pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
+        break;
+    case InputCommon::Polling::DeviceType::Button:
+        pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
+        break;
+    }
+
+    return pollers;
+}
+
+} // namespace SDL
+} // namespace InputCommon
diff --git a/src/input_common/sdl/sdl_impl.h b/src/input_common/sdl/sdl_impl.h
new file mode 100644
index 000000000..2579741d6
--- /dev/null
+++ b/src/input_common/sdl/sdl_impl.h
@@ -0,0 +1,63 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <thread>
+#include "common/threadsafe_queue.h"
+#include "input_common/sdl/sdl.h"
+
+union SDL_Event;
+using SDL_Joystick = struct _SDL_Joystick;
+using SDL_JoystickID = s32;
+
+namespace InputCommon::SDL {
+
+class SDLJoystick;
+class SDLButtonFactory;
+class SDLAnalogFactory;
+
+class SDLState : public State {
+public:
+    /// Initializes and registers SDL device factories
+    SDLState();
+
+    /// Unregisters SDL device factories and shut them down.
+    ~SDLState() override;
+
+    /// Handle SDL_Events for joysticks from SDL_PollEvent
+    void HandleGameControllerEvent(const SDL_Event& event);
+
+    std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id);
+    std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
+
+    /// Get all DevicePoller that use the SDL backend for a specific device type
+    Pollers GetPollers(Polling::DeviceType type) override;
+
+    /// Used by the Pollers during config
+    std::atomic<bool> polling = false;
+    Common::SPSCQueue<SDL_Event> event_queue;
+
+private:
+    void InitJoystick(int joystick_index);
+    void CloseJoystick(SDL_Joystick* sdl_joystick);
+
+    /// Needs to be called before SDL_QuitSubSystem.
+    void CloseJoysticks();
+
+    /// Map of GUID of a list of corresponding virtual Joysticks
+    std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
+    std::mutex joystick_map_mutex;
+
+    std::shared_ptr<SDLButtonFactory> button_factory;
+    std::shared_ptr<SDLAnalogFactory> analog_factory;
+
+    bool start_thread = false;
+    std::atomic<bool> initialized = false;
+
+    std::thread poll_thread;
+};
+} // namespace InputCommon::SDL
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 37f09ce5f..c7038b217 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,4 +1,7 @@
 add_executable(tests
+    common/bit_field.cpp
+    common/bit_utils.cpp
+    common/multi_level_queue.cpp
     common/param_package.cpp
     common/ring_buffer.cpp
     core/arm/arm_test_common.cpp
diff --git a/src/tests/common/bit_field.cpp b/src/tests/common/bit_field.cpp
new file mode 100644
index 000000000..8ca1889f9
--- /dev/null
+++ b/src/tests/common/bit_field.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <cstring>
+#include <type_traits>
+#include <catch2/catch.hpp>
+#include "common/bit_field.h"
+
+TEST_CASE("BitField", "[common]") {
+    enum class TestEnum : u32 {
+        A = 0b10111101,
+        B = 0b10101110,
+        C = 0b00001111,
+    };
+
+    union LEBitField {
+        u32_le raw;
+        BitField<0, 6, u32> a;
+        BitField<6, 4, s32> b;
+        BitField<10, 8, TestEnum> c;
+        BitField<18, 14, u32> d;
+    } le_bitfield;
+
+    union BEBitField {
+        u32_be raw;
+        BitFieldBE<0, 6, u32> a;
+        BitFieldBE<6, 4, s32> b;
+        BitFieldBE<10, 8, TestEnum> c;
+        BitFieldBE<18, 14, u32> d;
+    } be_bitfield;
+
+    static_assert(sizeof(LEBitField) == sizeof(u32));
+    static_assert(sizeof(BEBitField) == sizeof(u32));
+    static_assert(std::is_trivially_copyable_v<LEBitField>);
+    static_assert(std::is_trivially_copyable_v<BEBitField>);
+
+    std::array<u8, 4> raw{{
+        0b01101100,
+        0b11110110,
+        0b10111010,
+        0b11101100,
+    }};
+
+    std::memcpy(&le_bitfield, &raw, sizeof(raw));
+    std::memcpy(&be_bitfield, &raw, sizeof(raw));
+
+    // bit fields: 11101100101110'10111101'1001'101100
+    REQUIRE(le_bitfield.raw == 0b11101100'10111010'11110110'01101100);
+    REQUIRE(le_bitfield.a == 0b101100);
+    REQUIRE(le_bitfield.b == -7); // 1001 as two's complement
+    REQUIRE(le_bitfield.c == TestEnum::A);
+    REQUIRE(le_bitfield.d == 0b11101100101110);
+
+    le_bitfield.a.Assign(0b000111);
+    le_bitfield.b.Assign(-1);
+    le_bitfield.c.Assign(TestEnum::C);
+    le_bitfield.d.Assign(0b01010101010101);
+    std::memcpy(&raw, &le_bitfield, sizeof(raw));
+    // bit fields: 01010101010101'00001111'1111'000111
+    REQUIRE(le_bitfield.raw == 0b01010101'01010100'00111111'11000111);
+    REQUIRE(raw == std::array<u8, 4>{{
+                       0b11000111,
+                       0b00111111,
+                       0b01010100,
+                       0b01010101,
+                   }});
+
+    // bit fields: 01101100111101'10101110'1011'101100
+    REQUIRE(be_bitfield.raw == 0b01101100'11110110'10111010'11101100);
+    REQUIRE(be_bitfield.a == 0b101100);
+    REQUIRE(be_bitfield.b == -5); // 1011 as two's complement
+    REQUIRE(be_bitfield.c == TestEnum::B);
+    REQUIRE(be_bitfield.d == 0b01101100111101);
+
+    be_bitfield.a.Assign(0b000111);
+    be_bitfield.b.Assign(-1);
+    be_bitfield.c.Assign(TestEnum::C);
+    be_bitfield.d.Assign(0b01010101010101);
+    std::memcpy(&raw, &be_bitfield, sizeof(raw));
+    // bit fields: 01010101010101'00001111'1111'000111
+    REQUIRE(be_bitfield.raw == 0b01010101'01010100'00111111'11000111);
+    REQUIRE(raw == std::array<u8, 4>{{
+                       0b01010101,
+                       0b01010100,
+                       0b00111111,
+                       0b11000111,
+                   }});
+}
diff --git a/src/tests/common/bit_utils.cpp b/src/tests/common/bit_utils.cpp
new file mode 100644
index 000000000..479b5995a
--- /dev/null
+++ b/src/tests/common/bit_utils.cpp
@@ -0,0 +1,23 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/bit_util.h"
+
+namespace Common {
+
+TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") {
+    REQUIRE(Common::CountTrailingZeroes32(0) == 32);
+    REQUIRE(Common::CountTrailingZeroes64(0) == 64);
+    REQUIRE(Common::CountTrailingZeroes32(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes32(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes64(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36);
+}
+
+} // namespace Common
diff --git a/src/tests/common/multi_level_queue.cpp b/src/tests/common/multi_level_queue.cpp
new file mode 100644
index 000000000..cca7ec7da
--- /dev/null
+++ b/src/tests/common/multi_level_queue.cpp
@@ -0,0 +1,55 @@
+// Copyright 2019 Yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/common_types.h"
+#include "common/multi_level_queue.h"
+
+namespace Common {
+
+TEST_CASE("MultiLevelQueue", "[common]") {
+    std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0};
+    Common::MultiLevelQueue<f32, 64> mlq;
+    REQUIRE(mlq.empty());
+    mlq.add(values[2], 2);
+    mlq.add(values[7], 7);
+    mlq.add(values[3], 3);
+    mlq.add(values[4], 4);
+    mlq.add(values[0], 0);
+    mlq.add(values[5], 5);
+    mlq.add(values[6], 6);
+    mlq.add(values[1], 1);
+    u32 index = 0;
+    bool all_set = true;
+    for (auto& f : mlq) {
+        all_set &= (f == values[index]);
+        index++;
+    }
+    REQUIRE(all_set);
+    REQUIRE(!mlq.empty());
+    f32 v = 8.0;
+    mlq.add(v, 2);
+    v = -7.0;
+    mlq.add(v, 2, false);
+    REQUIRE(mlq.front(2) == -7.0);
+    mlq.yield(2);
+    REQUIRE(mlq.front(2) == values[2]);
+    REQUIRE(mlq.back(2) == -7.0);
+    REQUIRE(mlq.empty(8));
+    v = 10.0;
+    mlq.add(v, 8);
+    mlq.adjust(v, 8, 9);
+    REQUIRE(mlq.front(9) == v);
+    REQUIRE(mlq.empty(8));
+    REQUIRE(!mlq.empty(9));
+    mlq.adjust(values[0], 0, 9);
+    REQUIRE(mlq.highest_priority_set() == 1);
+    REQUIRE(mlq.lowest_priority_set() == 9);
+    mlq.remove(values[1], 1);
+    REQUIRE(mlq.highest_priority_set() == 2);
+    REQUIRE(mlq.empty(1));
+}
+
+} // namespace Common
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 9b8a44fa1..58af41f6e 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 
+#include "common/page_table.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
@@ -13,21 +14,20 @@
 namespace ArmTests {
 
 TestEnvironment::TestEnvironment(bool mutable_memory_)
-    : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
-
-    auto process = Kernel::Process::Create(kernel, "");
-    kernel.MakeCurrentProcess(process.get());
-    page_table = &Core::CurrentProcess()->VMManager().page_table;
+    : mutable_memory(mutable_memory_),
+      test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
+    auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
+    page_table = &process->VMManager().page_table;
 
     std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
     page_table->special_regions.clear();
     std::fill(page_table->attributes.begin(), page_table->attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 
     Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
     Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
 
-    Memory::SetCurrentPageTable(page_table);
+    kernel.MakeCurrentProcess(process.get());
 }
 
 TestEnvironment::~TestEnvironment() {
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index 0b7539601..d145dbfcc 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/memory_hook.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/memory_hook.h"
 
-namespace Memory {
+namespace Common {
 struct PageTable;
 }
 
@@ -58,7 +58,7 @@ public:
 
 private:
     friend struct TestMemory;
-    struct TestMemory final : Memory::MemoryHook {
+    struct TestMemory final : Common::MemoryHook {
         explicit TestMemory(TestEnvironment* env_) : env(env_) {}
         TestEnvironment* env;
 
@@ -86,7 +86,7 @@ private:
     bool mutable_memory;
     std::shared_ptr<TestMemory> test_memory;
     std::vector<WriteRecord> write_records;
-    Memory::PageTable* page_table = nullptr;
+    Common::PageTable* page_table = nullptr;
     Kernel::KernelCore kernel;
 };
 
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 2242c14cf..340d6a272 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
     REQUIRE(lateness == cycles_late);
 }
 
-class ScopeInit final {
-public:
+struct ScopeInit final {
     ScopeInit() {
-        CoreTiming::Init();
+        core_timing.Initialize();
     }
     ~ScopeInit() {
-        CoreTiming::Shutdown();
+        core_timing.Shutdown();
     }
+
+    Core::Timing::CoreTiming core_timing;
 };
 
-static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0,
-                            int cpu_downcount = 0) {
+static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
+                            int expected_lateness = 0, int cpu_downcount = 0) {
     callbacks_ran_flags = 0;
     expected_callback = CB_IDS[idx];
     lateness = expected_lateness;
 
-    CoreTiming::AddTicks(CoreTiming::GetDowncount() -
-                         cpu_downcount); // Pretend we executed X cycles of instructions.
-    CoreTiming::Advance();
+    // Pretend we executed X cycles of instructions.
+    core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
+    core_timing.Advance();
 
     REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
-    REQUIRE(downcount == CoreTiming::GetDowncount());
+    REQUIRE(downcount == core_timing.GetDowncount());
 }
 
 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
     ScopeInit guard;
+    auto& core_timing = guard.core_timing;
 
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
 
     // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();
 
     // D -> B -> C -> A -> E
-    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    REQUIRE(1000 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]);
-    REQUIRE(100 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]);
-    REQUIRE(100 == CoreTiming::GetDowncount());
-
-    AdvanceAndCheck(3, 400);
-    AdvanceAndCheck(1, 300);
-    AdvanceAndCheck(2, 200);
-    AdvanceAndCheck(0, 200);
-    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    REQUIRE(1000 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
+    REQUIRE(100 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
+    REQUIRE(100 == core_timing.GetDowncount());
+
+    AdvanceAndCheck(core_timing, 3, 400);
+    AdvanceAndCheck(core_timing, 1, 300);
+    AdvanceAndCheck(core_timing, 2, 200);
+    AdvanceAndCheck(core_timing, 0, 200);
+    AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
 }
 
 TEST_CASE("CoreTiming[Threadsave]", "[core]") {
     ScopeInit guard;
+    auto& core_timing = guard.core_timing;
 
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
 
     // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();
 
     // D -> B -> C -> A -> E
-    CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
     // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(1000);
-    REQUIRE(1000 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
+    core_timing.ForceExceptionCheck(1000);
+    REQUIRE(1000 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
     // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(500);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
+    core_timing.ForceExceptionCheck(500);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
     // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(800);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
+    core_timing.ForceExceptionCheck(800);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
     // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(100);
-    REQUIRE(100 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
+    core_timing.ForceExceptionCheck(100);
+    REQUIRE(100 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
     // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(1200);
-    REQUIRE(100 == CoreTiming::GetDowncount());
-
-    AdvanceAndCheck(3, 400);
-    AdvanceAndCheck(1, 300);
-    AdvanceAndCheck(2, 200);
-    AdvanceAndCheck(0, 200);
-    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+    core_timing.ForceExceptionCheck(1200);
+    REQUIRE(100 == core_timing.GetDowncount());
+
+    AdvanceAndCheck(core_timing, 3, 400);
+    AdvanceAndCheck(core_timing, 1, 300);
+    AdvanceAndCheck(core_timing, 2, 200);
+    AdvanceAndCheck(core_timing, 0, 200);
+    AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
 }
 
 namespace SharedSlotTest {
@@ -142,59 +145,63 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
     using namespace SharedSlotTest;
 
     ScopeInit guard;
+    auto& core_timing = guard.core_timing;
 
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);
 
-    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]);
-    CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]);
-    CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]);
+    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
+    core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
+    core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);
 
     // Enter slice 0
-    CoreTiming::Advance();
-    REQUIRE(1000 == CoreTiming::GetDowncount());
+    core_timing.Advance();
+    REQUIRE(1000 == core_timing.GetDowncount());
 
     callbacks_ran_flags = 0;
     counter = 0;
     lateness = 0;
-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
-    CoreTiming::Advance();
-    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance();
+    REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
     REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
 }
 
-TEST_CASE("CoreTiming[PredictableLateness]", "[core]") {
+TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
     ScopeInit guard;
+    auto& core_timing = guard.core_timing;
 
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
 
     // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();
 
-    CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
 
-    AdvanceAndCheck(0, 90, 10, -10); // (100 - 10)
-    AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50);
+    AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
+    AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
 }
 
 namespace ChainSchedulingTest {
 static int reschedules = 0;
 
-static void RescheduleCallback(u64 userdata, s64 cycles_late) {
+static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
+                               s64 cycles_late) {
     --reschedules;
     REQUIRE(reschedules >= 0);
     REQUIRE(lateness == cycles_late);
 
-    if (reschedules > 0)
-        CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata),
+    if (reschedules > 0) {
+        core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
                                   userdata);
+    }
 }
 } // namespace ChainSchedulingTest
 
@@ -202,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
     using namespace ChainSchedulingTest;
 
     ScopeInit guard;
+    auto& core_timing = guard.core_timing;
 
-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_rs =
-        CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
+        "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
+            RescheduleCallback(core_timing, userdata, cycles_late);
+        });
 
     // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();
 
-    CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]);
-    CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
-    REQUIRE(800 == CoreTiming::GetDowncount());
+    core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
+    core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
+    REQUIRE(800 == core_timing.GetDowncount());
 
     reschedules = 3;
-    AdvanceAndCheck(0, 200);  // cb_a
-    AdvanceAndCheck(1, 1000); // cb_b, cb_rs
+    AdvanceAndCheck(core_timing, 0, 200);  // cb_a
+    AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
     REQUIRE(2 == reschedules);
 
-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
-    CoreTiming::Advance(); // cb_rs
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance(); // cb_rs
     REQUIRE(1 == reschedules);
-    REQUIRE(200 == CoreTiming::GetDowncount());
+    REQUIRE(200 == core_timing.GetDowncount());
 
-    AdvanceAndCheck(2, 800); // cb_c
+    AdvanceAndCheck(core_timing, 2, 800); // cb_c
 
-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
-    CoreTiming::Advance(); // cb_rs
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance(); // cb_rs
     REQUIRE(0 == reschedules);
-    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+    REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
 }
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 327db68a5..114bed20d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,18 +5,24 @@ add_library(video_core STATIC
     debug_utils/debug_utils.h
     engines/fermi_2d.cpp
     engines/fermi_2d.h
+    engines/kepler_compute.cpp
+    engines/kepler_compute.h
     engines/kepler_memory.cpp
     engines/kepler_memory.h
     engines/maxwell_3d.cpp
     engines/maxwell_3d.h
-    engines/maxwell_compute.cpp
-    engines/maxwell_compute.h
     engines/maxwell_dma.cpp
     engines/maxwell_dma.h
     engines/shader_bytecode.h
     engines/shader_header.h
     gpu.cpp
     gpu.h
+    gpu_asynch.cpp
+    gpu_asynch.h
+    gpu_synch.cpp
+    gpu_synch.h
+    gpu_thread.cpp
+    gpu_thread.h
     macro_interpreter.cpp
     macro_interpreter.h
     memory_manager.cpp
@@ -44,6 +50,8 @@ add_library(video_core STATIC
     renderer_opengl/gl_shader_cache.h
     renderer_opengl/gl_shader_decompiler.cpp
     renderer_opengl/gl_shader_decompiler.h
+    renderer_opengl/gl_shader_disk_cache.cpp
+    renderer_opengl/gl_shader_disk_cache.h
     renderer_opengl/gl_shader_gen.cpp
     renderer_opengl/gl_shader_gen.h
     renderer_opengl/gl_shader_manager.cpp
@@ -59,18 +67,83 @@ add_library(video_core STATIC
     renderer_opengl/renderer_opengl.h
     renderer_opengl/utils.cpp
     renderer_opengl/utils.h
+    shader/decode/arithmetic.cpp
+    shader/decode/arithmetic_immediate.cpp
+    shader/decode/bfe.cpp
+    shader/decode/bfi.cpp
+    shader/decode/shift.cpp
+    shader/decode/arithmetic_integer.cpp
+    shader/decode/arithmetic_integer_immediate.cpp
+    shader/decode/arithmetic_half.cpp
+    shader/decode/arithmetic_half_immediate.cpp
+    shader/decode/ffma.cpp
+    shader/decode/hfma2.cpp
+    shader/decode/conversion.cpp
+    shader/decode/memory.cpp
+    shader/decode/texture.cpp
+    shader/decode/float_set_predicate.cpp
+    shader/decode/integer_set_predicate.cpp
+    shader/decode/half_set_predicate.cpp
+    shader/decode/predicate_set_register.cpp
+    shader/decode/predicate_set_predicate.cpp
+    shader/decode/register_set_predicate.cpp
+    shader/decode/float_set.cpp
+    shader/decode/integer_set.cpp
+    shader/decode/half_set.cpp
+    shader/decode/video.cpp
+    shader/decode/xmad.cpp
+    shader/decode/other.cpp
+    shader/decode.cpp
+    shader/shader_ir.cpp
+    shader/shader_ir.h
+    shader/track.cpp
     surface.cpp
     surface.h
     textures/astc.cpp
     textures/astc.h
+    textures/convert.cpp
+    textures/convert.h
     textures/decoders.cpp
     textures/decoders.h
     textures/texture.h
+    texture_cache.cpp
+    texture_cache.h
     video_core.cpp
     video_core.h
 )
 
+if (ENABLE_VULKAN)
+    target_sources(video_core PRIVATE
+        renderer_vulkan/declarations.h
+        renderer_vulkan/maxwell_to_vk.cpp
+        renderer_vulkan/maxwell_to_vk.h
+        renderer_vulkan/vk_buffer_cache.cpp
+        renderer_vulkan/vk_buffer_cache.h
+        renderer_vulkan/vk_device.cpp
+        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_memory_manager.cpp
+        renderer_vulkan/vk_memory_manager.h
+        renderer_vulkan/vk_resource_manager.cpp
+        renderer_vulkan/vk_resource_manager.h
+        renderer_vulkan/vk_sampler_cache.cpp
+        renderer_vulkan/vk_sampler_cache.h
+        renderer_vulkan/vk_scheduler.cpp
+        renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_shader_decompiler.cpp
+        renderer_vulkan/vk_shader_decompiler.h
+        renderer_vulkan/vk_stream_buffer.cpp
+        renderer_vulkan/vk_stream_buffer.h
+        renderer_vulkan/vk_swapchain.cpp
+        renderer_vulkan/vk_swapchain.h)
+
+    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
+    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
+endif()
+
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
 target_link_libraries(video_core PRIVATE glad)
+if (ENABLE_VULKAN)
+    target_link_libraries(video_core PRIVATE sirit)
+endif()
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5ffb492ea..f0ef67535 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -10,7 +10,7 @@ namespace Tegra {
 
 void DebugContext::DoOnEvent(Event event, void* data) {
     {
-        std::unique_lock<std::mutex> lock(breakpoint_mutex);
+        std::unique_lock lock{breakpoint_mutex};
 
         // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
         // show on debug widgets
@@ -32,7 +32,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
 
 void DebugContext::Resume() {
     {
-        std::lock_guard<std::mutex> lock(breakpoint_mutex);
+        std::lock_guard lock{breakpoint_mutex};
 
         // Tell all observers that we are about to resume
         for (auto& breakpoint_observer : breakpoint_observers) {
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index c235faf46..ac3a2eb01 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -40,7 +40,7 @@ public:
         /// Constructs the object such that it observes events of the given DebugContext.
         explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
             : context_weak(debug_context) {
-            std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
+            std::unique_lock lock{debug_context->breakpoint_mutex};
             debug_context->breakpoint_observers.push_back(this);
         }
 
@@ -48,7 +48,7 @@ public:
             auto context = context_weak.lock();
             if (context) {
                 {
-                    std::unique_lock<std::mutex> lock(context->breakpoint_mutex);
+                    std::unique_lock lock{context->breakpoint_mutex};
                     context->breakpoint_observers.remove(this);
                 }
 
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 63a958f11..046d047cb 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -8,6 +8,7 @@
 #include "video_core/dma_pusher.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
@@ -33,16 +34,33 @@ void DmaPusher::DispatchCalls() {
 }
 
 bool DmaPusher::Step() {
-    if (dma_get != dma_put) {
-        // Push buffer non-empty, read a word
-        const CommandHeader command_header{
-            Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))};
+    if (!ib_enable || dma_pushbuffer.empty()) {
+        // pushbuffer empty and IB empty or nonexistent - nothing to do
+        return false;
+    }
 
-        dma_get += sizeof(u32);
+    const CommandList& command_list{dma_pushbuffer.front()};
+    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
+    GPUVAddr dma_get = command_list_header.addr;
+    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
+    bool non_main = command_list_header.is_non_main;
 
-        if (!non_main) {
-            dma_mget = dma_get;
-        }
+    if (dma_pushbuffer_subindex >= command_list.size()) {
+        // We've gone through the current list, remove it from the queue
+        dma_pushbuffer.pop();
+        dma_pushbuffer_subindex = 0;
+    }
+
+    if (command_list_header.size == 0) {
+        return true;
+    }
+
+    // Push buffer non-empty, read a word
+    command_headers.resize(command_list_header.size);
+    gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+                                  command_list_header.size * sizeof(u32));
+
+    for (const CommandHeader& command_header : command_headers) {
 
         // now, see if we're in the middle of a command
         if (dma_state.length_pending) {
@@ -89,22 +107,11 @@ bool DmaPusher::Step() {
                 break;
             }
         }
-    } else if (ib_enable && !dma_pushbuffer.empty()) {
-        // Current pushbuffer empty, but we have more IB entries to read
-        const CommandList& command_list{dma_pushbuffer.front()};
-        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
-        dma_get = command_list_header.addr;
-        dma_put = dma_get + command_list_header.size * sizeof(u32);
-        non_main = command_list_header.is_non_main;
-
-        if (dma_pushbuffer_subindex >= command_list.size()) {
-            // We've gone through the current list, remove it from the queue
-            dma_pushbuffer.pop();
-            dma_pushbuffer_subindex = 0;
-        }
-    } else {
-        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
-        return {};
+    }
+
+    if (!non_main) {
+        // TODO (degasus): This is dead code, as dma_mget is never read.
+        dma_mget = dma_put;
     }
 
     return true;
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 16e0697c4..6ab06518f 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@
 
 #include "common/bit_field.h"
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
@@ -75,6 +74,8 @@ private:
 
     GPU& gpu;
 
+    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
+
     std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
     std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer
 
@@ -83,17 +84,14 @@ private:
         u32 subchannel;        ///< Current subchannel
         u32 method_count;      ///< Current method count
         u32 length_pending;    ///< Large NI command length pending
-        bool non_incrementing; ///< Current command�s NI flag
+        bool non_incrementing; ///< Current command's NI flag
     };
 
     DmaState dma_state{};
     bool dma_increment_once{};
 
-    GPUVAddr dma_put{};   ///< pushbuffer current end address
-    GPUVAddr dma_get{};   ///< pushbuffer current read address
     GPUVAddr dma_mget{};  ///< main pushbuffer last read address
     bool ib_enable{true}; ///< IB mode enabled
-    bool non_main{};      ///< non-main pushbuffer active
 };
 
 } // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 80f70e332..55966eef1 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,17 +2,17 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/core.h"
-#include "core/memory.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
 #include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"
 
 namespace Tegra::Engines {
 
 Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : rasterizer{rasterizer}, memory_manager{memory_manager} {}
 
 void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
     ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
     regs.reg_array[method_call.method] = method_call.argument;
 
     switch (method_call.method) {
-    case FERMI2D_REG_INDEX(trigger): {
+    // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
+    // so trigger on the second 32-bit write.
+    case FERMI2D_REG_INDEX(blit_src_y) + 1: {
         HandleSurfaceCopy();
         break;
     }
@@ -32,55 +34,23 @@ void Fermi2D::HandleSurfaceCopy() {
     LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
                 static_cast<u32>(regs.operation));
 
-    const GPUVAddr source = regs.src.Address();
-    const GPUVAddr dest = regs.dst.Address();
-
-    // TODO(Subv): Only same-format and same-size copies are allowed for now.
-    ASSERT(regs.src.format == regs.dst.format);
-    ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
-
     // TODO(Subv): Only raw copies are implemented.
     ASSERT(regs.operation == Regs::Operation::SrcCopy);
 
-    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
-    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
-
-    u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
-    u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
-
-    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
-        // All copies here update the main memory, so mark all rasterizer states as invalid.
-        Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
+    const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
+    const u32 src_blit_x2{
+        static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)};
+    const u32 src_blit_y2{
+        static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
 
-        rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
-        // We have to invalidate the destination region to evict any outdated surfaces from the
-        // cache. We do this before actually writing the new data because the destination address
-        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(dest_cpu,
-                                    dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
+    const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
+    const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
+                                          regs.blit_dst_x + regs.blit_dst_width,
+                                          regs.blit_dst_y + regs.blit_dst_height};
 
-        if (regs.src.linear == regs.dst.linear) {
-            // If the input layout and the output layout are the same, just perform a raw copy.
-            ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
-            Memory::CopyBlock(dest_cpu, source_cpu,
-                              src_bytes_per_pixel * regs.dst.width * regs.dst.height);
-            return;
-        }
-        u8* src_buffer = Memory::GetPointer(source_cpu);
-        u8* dst_buffer = Memory::GetPointer(dest_cpu);
-        if (!regs.src.linear && regs.dst.linear) {
-            // If the input is tiled and the output is linear, deswizzle the input and copy it over.
-            Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
-                                      src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
-                                      dst_buffer, true, regs.src.BlockHeight(),
-                                      regs.src.BlockDepth(), 0);
-        } else {
-            // If the input is linear and the output is tiled, swizzle the input and copy it over.
-            Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
-                                      src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
-                                      src_buffer, false, regs.dst.BlockHeight(),
-                                      regs.dst.BlockDepth(), 0);
-        }
+    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
+        UNIMPLEMENTED();
     }
 }
 
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 50009bf75..2e51b7f13 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,12 +5,15 @@
 #pragma once
 
 #include <array>
-#include "common/assert.h"
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -94,21 +97,30 @@ public:
 
                 Operation operation;
 
-                INSERT_PADDING_WORDS(0x9);
+                INSERT_PADDING_WORDS(0x177);
+
+                u32 blit_control;
+
+                INSERT_PADDING_WORDS(0x8);
 
-                // TODO(Subv): This is only a guess.
-                u32 trigger;
+                u32 blit_dst_x;
+                u32 blit_dst_y;
+                u32 blit_dst_width;
+                u32 blit_dst_height;
+                u64 blit_du_dx;
+                u64 blit_dv_dy;
+                u64 blit_src_x;
+                u64 blit_src_y;
 
-                INSERT_PADDING_WORDS(0x1A3);
+                INSERT_PADDING_WORDS(0x21);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
     } regs{};
 
-    MemoryManager& memory_manager;
-
 private:
     VideoCore::RasterizerInterface& rasterizer;
+    MemoryManager& memory_manager;
 
     /// Performs the copy from the source surface to the destination surface as configured in the
     /// registers.
@@ -122,7 +134,16 @@ private:
 ASSERT_REG_POSITION(dst, 0x80);
 ASSERT_REG_POSITION(src, 0x8C);
 ASSERT_REG_POSITION(operation, 0xAB);
-ASSERT_REG_POSITION(trigger, 0xB5);
+ASSERT_REG_POSITION(blit_control, 0x223);
+ASSERT_REG_POSITION(blit_dst_x, 0x22c);
+ASSERT_REG_POSITION(blit_dst_y, 0x22d);
+ASSERT_REG_POSITION(blit_dst_width, 0x22e);
+ASSERT_REG_POSITION(blit_dst_height, 0x22f);
+ASSERT_REG_POSITION(blit_du_dx, 0x230);
+ASSERT_REG_POSITION(blit_dv_dy, 0x232);
+ASSERT_REG_POSITION(blit_src_x, 0x234);
+ASSERT_REG_POSITION(blit_src_y, 0x236);
+
 #undef ASSERT_REG_POSITION
 
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
new file mode 100644
index 000000000..b1d950460
--- /dev/null
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -0,0 +1,33 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Engines {
+
+KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+
+KeplerCompute::~KeplerCompute() = default;
+
+void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
+    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+               "Invalid KeplerCompute register, increase the size of the Regs structure");
+
+    regs.reg_array[method_call.method] = method_call.argument;
+
+    switch (method_call.method) {
+    case KEPLER_COMPUTE_REG_INDEX(launch):
+        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
+        // kernels)
+        UNREACHABLE_MSG("Compute shaders are not implemented");
+        break;
+    default:
+        break;
+    }
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h
index 1d71f11bd..fb6cdf432 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,52 +5,56 @@
 #pragma once
 
 #include <array>
-#include "common/assert.h"
-#include "common/bit_field.h"
+#include <cstddef>
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 
+namespace Tegra {
+class MemoryManager;
+}
+
 namespace Tegra::Engines {
 
-#define MAXWELL_COMPUTE_REG_INDEX(field_name)                                                      \
-    (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32))
+#define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
+    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
 
-class MaxwellCompute final {
+class KeplerCompute final {
 public:
-    MaxwellCompute() = default;
-    ~MaxwellCompute() = default;
+    explicit KeplerCompute(MemoryManager& memory_manager);
+    ~KeplerCompute();
+
+    static constexpr std::size_t NumConstBuffers = 8;
 
     struct Regs {
         static constexpr std::size_t NUM_REGS = 0xCF8;
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0x281);
+                INSERT_PADDING_WORDS(0xAF);
 
-                union {
-                    u32 compute_end;
-                    BitField<0, 1, u32> unknown;
-                } compute;
+                u32 launch;
 
-                INSERT_PADDING_WORDS(0xA76);
+                INSERT_PADDING_WORDS(0xC48);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
     } regs{};
-
     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
-                  "MaxwellCompute Regs has wrong size");
+                  "KeplerCompute Regs has wrong size");
 
     /// Write the value to the register identified by method.
     void CallMethod(const GPU::MethodCall& method_call);
+
+private:
+    MemoryManager& memory_manager;
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4,                      \
+    static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
                   "Field " #field_name " has invalid position")
 
-ASSERT_REG_POSITION(compute, 0x281);
+ASSERT_REG_POSITION(launch, 0xAF);
 
 #undef ASSERT_REG_POSITION
 
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4880191fc..cd51a31d7 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -2,18 +2,20 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 
 namespace Tegra::Engines {
 
-KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,
+KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                            MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
 
 KeplerMemory::~KeplerMemory() = default;
 
@@ -39,17 +41,14 @@ void KeplerMemory::ProcessData(u32 data) {
     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
 
-    GPUVAddr address = regs.dest.Address();
-    VAddr dest_address =
-        *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
-
     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might contain
-    // a dirty surface that will have to be written back to memory.
-    rasterizer.InvalidateRegion(dest_address, sizeof(u32));
+    // We do this before actually writing the new data because the destination address might
+    // contain a dirty surface that will have to be written back to memory.
+    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
+    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
+    memory_manager.Write<u32>(address, data);
 
-    Memory::Write32(dest_address, data);
-    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 
     state.write_offset++;
 }
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index fe9ebc5b9..78b6c3e45 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,12 +5,19 @@
 #pragma once
 
 #include <array>
-#include "common/assert.h"
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -23,7 +30,8 @@ namespace Tegra::Engines {
 
 class KeplerMemory final {
 public:
-    KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                 MemoryManager& memory_manager);
     ~KeplerMemory();
 
     /// Write the value to the register identified by method.
@@ -76,8 +84,9 @@ public:
     } state{};
 
 private:
-    MemoryManager& memory_manager;
+    Core::System& system;
     VideoCore::RasterizerInterface& rasterizer;
+    MemoryManager& memory_manager;
 
     void ProcessData(u32 data);
 };
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index d64a5080b..74403eed4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,11 +7,10 @@
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
-#include "core/memory.h"
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
 #include "video_core/textures/texture.h"
 
 namespace Tegra::Engines {
@@ -19,8 +18,10 @@ namespace Tegra::Engines {
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;
 
-Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
+Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                     MemoryManager& memory_manager)
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
+                                                                                  *this} {
     InitializeRegisterDefaults();
 }
 
@@ -37,6 +38,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
         regs.viewports[viewport].depth_range_near = 0.0f;
         regs.viewports[viewport].depth_range_far = 1.0f;
     }
+
     // Doom and Bomberman seems to use the uninitialized registers and just enable blend
     // so initialize blend registers with sane values
     regs.blend.equation_rgb = Regs::Blend::Equation::Add;
@@ -66,6 +68,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
     regs.stencil_back_func_func = Regs::ComparisonOp::Always;
     regs.stencil_back_func_mask = 0xFFFFFFFF;
     regs.stencil_back_mask = 0xFFFFFFFF;
+
     // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
     // register carrying a default value. Assume it's OpenGL's default (1).
     regs.point_size = 1.0f;
@@ -78,6 +81,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
         regs.color_mask[color_mask].B.Assign(1);
         regs.color_mask[color_mask].A.Assign(1);
     }
+
+    // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
+    regs.rt_separate_frag_data = 1;
 }
 
 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
@@ -98,23 +104,25 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
 }
 
 void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
-    auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+    auto debug_context = system.GetGPUDebugContext();
+
+    const u32 method = method_call.method;
 
     // It is an error to write to a register other than the current macro's ARG register before it
     // has finished execution.
     if (executing_macro != 0) {
-        ASSERT(method_call.method == executing_macro + 1);
+        ASSERT(method == executing_macro + 1);
     }
 
     // Methods after 0xE00 are special, they're actually triggers for some microcode that was
     // uploaded to the GPU during initialization.
-    if (method_call.method >= MacroRegistersStart) {
+    if (method >= MacroRegistersStart) {
         // We're trying to execute a macro
         if (executing_macro == 0) {
             // A macro call must begin by writing the macro method's register, not its argument.
-            ASSERT_MSG((method_call.method % 2) == 0,
+            ASSERT_MSG((method % 2) == 0,
                        "Can't start macro execution by writing to the ARGS register");
-            executing_macro = method_call.method;
+            executing_macro = method;
         }
 
         macro_params.push_back(method_call.argument);
@@ -126,66 +134,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         return;
     }
 
-    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+    ASSERT_MSG(method < Regs::NUM_REGS,
                "Invalid Maxwell3D register, increase the size of the Regs structure");
 
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
     }
 
-    if (regs.reg_array[method_call.method] != method_call.argument) {
-        regs.reg_array[method_call.method] = method_call.argument;
+    if (regs.reg_array[method] != method_call.argument) {
+        regs.reg_array[method] = method_call.argument;
         // Color buffers
         constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
         constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
-        if (method_call.method >= first_rt_reg &&
-            method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
-            const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
-            dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
+        if (method >= first_rt_reg &&
+            method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
+            const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
+            dirty_flags.color_buffer.set(rt_index);
         }
 
         // Zeta buffer
         constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
-        if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
-            (method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
-             method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
+        if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
+            method == MAXWELL3D_REG_INDEX(zeta_width) ||
+            method == MAXWELL3D_REG_INDEX(zeta_height) ||
+            (method >= MAXWELL3D_REG_INDEX(zeta) &&
+             method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
             dirty_flags.zeta_buffer = true;
         }
 
         // Shader
         constexpr u32 shader_registers_count =
             sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
-        if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
-            method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
+        if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
+            method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
             dirty_flags.shaders = true;
         }
 
         // Vertex format
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
-            method_call.method <
-                MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
             dirty_flags.vertex_attrib_format = true;
         }
 
         // Vertex buffer
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
-            method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
-            dirty_flags.vertex_array |=
-                1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
+        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
+            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
+        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
+                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
+        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
+                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+            dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
         }
     }
 
-    switch (method_call.method) {
+    switch (method) {
     case MAXWELL3D_REG_INDEX(macros.data): {
         ProcessMacroUpload(method_call.argument);
         break;
@@ -245,6 +249,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         ProcessQueryGet();
         break;
     }
+    case MAXWELL3D_REG_INDEX(sync_info): {
+        ProcessSyncPoint();
+        break;
+    }
     default:
         break;
     }
@@ -265,10 +273,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
 }
 
 void Maxwell3D::ProcessQueryGet() {
-    GPUVAddr sequence_address = regs.query.QueryAddress();
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
     // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
     // VAddr before writing.
-    std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
 
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -303,7 +310,7 @@ void Maxwell3D::ProcessQueryGet() {
             // Write the current query sequence to the sequence address.
             // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
             // query.
-            Memory::Write32(*address, sequence);
+            memory_manager.Write<u32>(sequence_address, sequence);
         } else {
             // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
             // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -311,8 +318,8 @@ void Maxwell3D::ProcessQueryGet() {
             LongQueryResult query_result{};
             query_result.value = result;
             // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = CoreTiming::GetTicks();
-            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+            query_result.timestamp = system.CoreTiming().GetTicks();
+            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
         }
         dirty_flags.OnMemoryWrite();
         break;
@@ -323,12 +330,20 @@ void Maxwell3D::ProcessQueryGet() {
     }
 }
 
+void Maxwell3D::ProcessSyncPoint() {
+    const u32 sync_point = regs.sync_info.sync_point.Value();
+    const u32 increment = regs.sync_info.increment.Value();
+    const u32 cache_flush = regs.sync_info.unknown.Value();
+    LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
+              cache_flush);
+}
+
 void Maxwell3D::DrawArrays() {
     LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
               regs.vertex_buffer.count);
     ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
 
-    auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+    auto debug_context = system.GetGPUDebugContext();
 
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
@@ -381,16 +396,18 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
 
 void Maxwell3D::ProcessCBData(u32 value) {
     // Write the input value to the current const buffer at the current position.
-    GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
+    const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
     ASSERT(buffer_address != 0);
 
     // Don't allow writing past the end of the buffer.
     ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
 
-    std::optional<VAddr> address =
-        memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
+    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+
+    u8* ptr{memory_manager.GetPointer(address)};
+    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
+    memory_manager.Write<u32>(address, value);
 
-    Memory::Write32(*address, value);
     dirty_flags.OnMemoryWrite();
 
     // Increment the current buffer position.
@@ -398,22 +415,19 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }
 
 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    GPUVAddr tic_base_address = regs.tic.TICAddress();
-
-    GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+    const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
 
     Texture::TICEntry tic_entry;
-    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
 
     ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
                "TIC versions other than BlockLinear or Pitch are unimplemented");
 
-    auto r_type = tic_entry.r_type.Value();
-    auto g_type = tic_entry.g_type.Value();
-    auto b_type = tic_entry.b_type.Value();
-    auto a_type = tic_entry.a_type.Value();
+    const auto r_type = tic_entry.r_type.Value();
+    const auto g_type = tic_entry.g_type.Value();
+    const auto b_type = tic_entry.b_type.Value();
+    const auto a_type = tic_entry.a_type.Value();
 
     // TODO(Subv): Different data types for separate components are not supported
     ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
@@ -422,13 +436,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }
 
 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
-
-    GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+    const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
 
     Texture::TSCEntry tsc_entry;
-    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
     return tsc_entry;
 }
 
@@ -447,8 +458,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
 
-        Texture::TextureHandle tex_handle{
-            Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))};
+        const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
 
         Texture::FullTextureInfo tex_info{};
         // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -457,23 +467,16 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
             sizeof(Texture::TextureHandle);
 
         // Load the TIC data.
-        if (tex_handle.tic_id != 0) {
-            tex_info.enabled = true;
-
-            auto tic_entry = GetTICEntry(tex_handle.tic_id);
-            // TODO(Subv): Workaround for BitField's move constructor being deleted.
-            std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-        }
+        auto tic_entry = GetTICEntry(tex_handle.tic_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
 
         // Load the TSC data
-        if (tex_handle.tsc_id != 0) {
-            auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-            // TODO(Subv): Workaround for BitField's move constructor being deleted.
-            std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-        }
+        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
 
-        if (tex_info.enabled)
-            textures.push_back(tex_info);
+        textures.push_back(tex_info);
     }
 
     return textures;
@@ -485,31 +488,25 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
     auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
 
-    GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+    const GPUVAddr tex_info_address =
+        tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
 
     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
 
-    std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
-    Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
 
     Texture::FullTextureInfo tex_info{};
     tex_info.index = static_cast<u32>(offset);
 
     // Load the TIC data.
-    if (tex_handle.tic_id != 0) {
-        tex_info.enabled = true;
-
-        auto tic_entry = GetTICEntry(tex_handle.tic_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-    }
+    auto tic_entry = GetTICEntry(tex_handle.tic_id);
+    // TODO(Subv): Workaround for BitField's move constructor being deleted.
+    std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
 
     // Load the TSC data
-    if (tex_handle.tsc_id != 0) {
-        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-    }
+    auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+    // TODO(Subv): Workaround for BitField's move constructor being deleted.
+    std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
 
     return tex_info;
 }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1f76aa670..321af3297 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
 #pragma once
 
 #include <array>
+#include <bitset>
 #include <unordered_map>
 #include <vector>
+
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
@@ -14,9 +16,16 @@
 #include "common/math_util.h"
 #include "video_core/gpu.h"
 #include "video_core/macro_interpreter.h"
-#include "video_core/memory_manager.h"
 #include "video_core/textures/texture.h"
 
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -28,7 +37,8 @@ namespace Tegra::Engines {
 
 class Maxwell3D final {
 public:
-    explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                       MemoryManager& memory_manager);
     ~Maxwell3D() = default;
 
     /// Register structure of the Maxwell3D engine.
@@ -498,7 +508,7 @@ public:
             f32 translate_z;
             INSERT_PADDING_WORDS(2);
 
-            MathUtil::Rectangle<s32> GetRect() const {
+            Common::Rectangle<s32> GetRect() const {
                 return {
                     GetX(),               // left
                     GetY() + GetHeight(), // top
@@ -569,7 +579,17 @@ public:
                     u32 bind;
                 } macros;
 
-                INSERT_PADDING_WORDS(0x188);
+                INSERT_PADDING_WORDS(0x69);
+
+                struct {
+                    union {
+                        BitField<0, 16, u32> sync_point;
+                        BitField<16, 1, u32> unknown;
+                        BitField<20, 1, u32> increment;
+                    };
+                } sync_info;
+
+                INSERT_PADDING_WORDS(0x11E);
 
                 u32 tfb_enabled;
 
@@ -1086,22 +1106,20 @@ public:
     };
 
     State state{};
-    MemoryManager& memory_manager;
 
     struct DirtyFlags {
-        u8 color_buffer = 0xFF;
-        bool zeta_buffer = true;
-
-        bool shaders = true;
+        std::bitset<8> color_buffer{0xFF};
+        std::bitset<32> vertex_array{0xFFFFFFFF};
 
         bool vertex_attrib_format = true;
-        u32 vertex_array = 0xFFFFFFFF;
+        bool zeta_buffer = true;
+        bool shaders = true;
 
         void OnMemoryWrite() {
-            color_buffer = 0xFF;
             zeta_buffer = true;
             shaders = true;
-            vertex_array = 0xFFFFFFFF;
+            color_buffer.set();
+            vertex_array.set();
         }
     };
 
@@ -1131,8 +1149,12 @@ public:
 private:
     void InitializeRegisterDefaults();
 
+    Core::System& system;
+
     VideoCore::RasterizerInterface& rasterizer;
 
+    MemoryManager& memory_manager;
+
     /// Start offsets of each macro in macro_memory
     std::unordered_map<u32, u32> macro_offsets;
 
@@ -1172,6 +1194,9 @@ private:
     /// Handles a write to the QUERY_GET register.
     void ProcessQueryGet();
 
+    /// Handles writes to syncing register.
+    void ProcessSyncPoint();
+
     /// Handles a write to the CB_DATA[i] register.
     void ProcessCBData(u32 value);
 
@@ -1187,6 +1212,7 @@ private:
                   "Field " #field_name " has invalid position")
 
 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
deleted file mode 100644
index 656db6a61..000000000
--- a/src/video_core/engines/maxwell_compute.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_compute.h"
-
-namespace Tegra::Engines {
-
-void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
-    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
-               "Invalid MaxwellCompute register, increase the size of the Regs structure");
-
-    regs.reg_array[method_call.method] = method_call.argument;
-
-    switch (method_call.method) {
-    case MAXWELL_COMPUTE_REG_INDEX(compute): {
-        LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
-        UNREACHABLE();
-        break;
-    }
-    default:
-        break;
-    }
-}
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 06462f570..2426d0067 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,17 +2,21 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
+#include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 #include "video_core/textures/decoders.h"
 
 namespace Tegra::Engines {
 
-MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                       MemoryManager& memory_manager)
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
 
 void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
     ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -39,9 +43,6 @@ void MaxwellDMA::HandleCopy() {
     const GPUVAddr source = regs.src_address.Address();
     const GPUVAddr dest = regs.dst_address.Address();
 
-    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
-    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
-
     // TODO(Subv): Perform more research and implement all features of this engine.
     ASSERT(regs.exec.enable_swizzle == 0);
     ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -57,14 +58,14 @@ void MaxwellDMA::HandleCopy() {
     }
 
     // All copies here update the main memory, so mark all rasterizer states as invalid.
-    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 
     if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
         // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
         // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
         // y_count).
         if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count);
+            memory_manager.CopyBlock(dest, source, regs.x_count);
             return;
         }
 
@@ -73,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
         // rectangle. There is no need to manually flush/invalidate the regions because
         // CopyBlock does that for us.
         for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = source_cpu + line * regs.src_pitch;
-            const VAddr dest_line = dest_cpu + line * regs.dst_pitch;
-            Memory::CopyBlock(dest_line, source_line, regs.x_count);
+            const GPUVAddr source_line = source + line * regs.src_pitch;
+            const GPUVAddr dest_line = dest + line * regs.dst_pitch;
+            memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
         }
         return;
     }
@@ -84,15 +85,28 @@ void MaxwellDMA::HandleCopy() {
 
     const std::size_t copy_size = regs.x_count * regs.y_count;
 
+    auto source_ptr{memory_manager.GetPointer(source)};
+    auto dst_ptr{memory_manager.GetPointer(dest)};
+
+    if (!source_ptr) {
+        LOG_ERROR(HW_GPU, "source_ptr is invalid");
+        return;
+    }
+
+    if (!dst_ptr) {
+        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
+        return;
+    }
+
     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
         // copying.
-        rasterizer.FlushRegion(source_cpu, src_size);
+        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
 
         // We have to invalidate the destination region to evict any outdated surfaces from the
         // cache. We do this before actually writing the new data because the destination address
         // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(dest_cpu, dst_size);
+        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
     };
 
     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -105,7 +119,7 @@ void MaxwellDMA::HandleCopy() {
                            copy_size * src_bytes_per_pixel);
 
         Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu,
+                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
                                   regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                   regs.src_params.pos_y);
     } else {
@@ -119,7 +133,7 @@ void MaxwellDMA::HandleCopy() {
 
         // If the input is linear and the output is tiled, swizzle the input and copy it over.
         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
     }
 }
 
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 1f8cd65d2..c6b649842 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,12 +5,19 @@
 #pragma once
 
 #include <array>
-#include "common/assert.h"
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -20,7 +27,8 @@ namespace Tegra::Engines {
 
 class MaxwellDMA final {
 public:
-    explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                        MemoryManager& memory_manager);
     ~MaxwellDMA() = default;
 
     /// Write the value to the register identified by method.
@@ -134,11 +142,13 @@ public:
         };
     } regs{};
 
-    MemoryManager& memory_manager;
-
 private:
+    Core::System& system;
+
     VideoCore::RasterizerInterface& rasterizer;
 
+    MemoryManager& memory_manager;
+
     /// Performs the copy from the source buffer to the destination buffer as configured in the
     /// registers.
     void HandleCopy();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e53c77f2b..2e1e96c81 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
 
 #include <bitset>
 #include <optional>
-#include <string>
 #include <tuple>
 #include <vector>
 
@@ -186,7 +185,7 @@ enum class SubOp : u64 {
 };
 
 enum class F2iRoundingOp : u64 {
-    None = 0,
+    RoundEven = 0,
     Floor = 1,
     Ceil = 2,
     Trunc = 3,
@@ -208,6 +207,8 @@ enum class UniformType : u64 {
     SignedShort = 3,
     Single = 4,
     Double = 5,
+    Quad = 6,
+    UnsignedQuad = 7,
 };
 
 enum class StoreType : u64 {
@@ -215,9 +216,9 @@ enum class StoreType : u64 {
     Signed8 = 1,
     Unsigned16 = 2,
     Signed16 = 3,
-    Bytes32 = 4,
-    Bytes64 = 5,
-    Bytes128 = 6,
+    Bits32 = 4,
+    Bits64 = 5,
+    Bits128 = 6,
 };
 
 enum class IMinMaxExchange : u64 {
@@ -323,11 +324,11 @@ enum class TextureQueryType : u64 {
 
 enum class TextureProcessMode : u64 {
     None = 0,
-    LZ = 1,  // Unknown, appears to be the same as none.
+    LZ = 1,  // Load LOD of zero.
     LB = 2,  // Load Bias.
-    LL = 3,  // Load LOD (LevelOfDetail)
-    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
-    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL
+    LL = 3,  // Load LOD.
+    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
+    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL.
 };
 
 enum class TextureMiscMode : u64 {
@@ -374,9 +375,9 @@ enum class R2pMode : u64 {
 };
 
 enum class IpaInterpMode : u64 {
-    Linear = 0,
-    Perspective = 1,
-    Flat = 2,
+    Pass = 0,
+    Multiply = 1,
+    Constant = 2,
     Sc = 3,
 };
 
@@ -397,6 +398,10 @@ struct IpaMode {
     bool operator!=(const IpaMode& a) const {
         return !operator==(a);
     }
+    bool operator<(const IpaMode& a) const {
+        return std::tie(interpolation_mode, sampling_mode) <
+               std::tie(a.interpolation_mode, a.sampling_mode);
+    }
 };
 
 enum class SystemVariable : u64 {
@@ -644,6 +649,7 @@ union Instruction {
             BitField<37, 2, HalfPrecision> precision;
             BitField<32, 1, u64> saturate;
 
+            BitField<31, 1, u64> negate_b;
             BitField<30, 1, u64> negate_c;
             BitField<35, 2, HalfType> type_c;
         } rr;
@@ -780,6 +786,12 @@ union Instruction {
     } st_l;
 
     union {
+        BitField<48, 3, UniformType> type;
+        BitField<46, 2, u64> cache_mode;
+        BitField<20, 24, s64> immediate_offset;
+    } ldg;
+
+    union {
         BitField<0, 3, u64> pred0;
         BitField<3, 3, u64> pred3;
         BitField<7, 1, u64> abs_a;
@@ -968,6 +980,10 @@ union Instruction {
             }
             return false;
         }
+
+        bool IsComponentEnabled(std::size_t component) const {
+            return ((1ULL << component) & component_mask) != 0;
+        }
     } txq;
 
     union {
@@ -1222,24 +1238,35 @@ union Instruction {
 
     union {
         BitField<20, 16, u64> imm20_16;
+        BitField<35, 1, u64> high_b_rr; // used on RR
         BitField<36, 1, u64> product_shift_left;
         BitField<37, 1, u64> merge_37;
         BitField<48, 1, u64> sign_a;
         BitField<49, 1, u64> sign_b;
+        BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
         BitField<50, 3, XmadMode> mode;
         BitField<52, 1, u64> high_b;
         BitField<53, 1, u64> high_a;
+        BitField<55, 1, u64> product_shift_left_second; // used on CR
         BitField<56, 1, u64> merge_56;
     } xmad;
 
     union {
         BitField<20, 14, u64> offset;
         BitField<34, 5, u64> index;
+
+        u64 GetOffset() const {
+            return offset * 4;
+        }
     } cbuf34;
 
     union {
         BitField<20, 16, s64> offset;
         BitField<36, 5, u64> index;
+
+        s64 GetOffset() const {
+            return offset;
+        }
     } cbuf36;
 
     // Unsure about the size of this one.
@@ -1421,6 +1448,7 @@ public:
         Flow,
         Synch,
         Memory,
+        Texture,
         FloatSet,
         FloatSetPredicate,
         IntegerSet,
@@ -1431,6 +1459,7 @@ public:
         PredicateSetRegister,
         RegisterSetPredicate,
         Conversion,
+        Video,
         Xmad,
         Unknown,
     };
@@ -1550,20 +1579,20 @@ private:
             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
             INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
-            INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
-            INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
-            INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
-            INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
-            INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
-            INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
-            INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
-            INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
+            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
+            INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
+            INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
+            INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
+            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
+            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
+            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
+            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
             INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
             INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
             INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
             INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
-            INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
-            INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"),
+            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
+            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
             INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
             INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
             INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -1636,7 +1665,7 @@ private:
             INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
             INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
             INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
-            INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+            INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
             INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
             INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
             INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index 99c34649f..e86a7f04a 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
     TriangleStrip = 7,
 };
 
+enum class AttributeUse : u8 {
+    Unused = 0,
+    Constant = 1,
+    Perspective = 2,
+    ScreenLinear = 3,
+};
+
 // Documentation in:
 // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 struct Header {
@@ -84,9 +91,15 @@ struct Header {
         } vtg;
 
         struct {
-            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+            union {
+                BitField<0, 2, AttributeUse> x;
+                BitField<2, 2, AttributeUse> y;
+                BitField<4, 2, AttributeUse> w;
+                BitField<6, 2, AttributeUse> z;
+                u8 raw;
+            } imap_generic_vector[32];
             INSERT_PADDING_BYTES(2);  // ImapColor
             INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
             INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,10 +116,32 @@ struct Header {
                 const u32 bit = render_target * 4 + component;
                 return omap.target & (1 << bit);
             }
+            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
+                return static_cast<AttributeUse>(
+                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
+            }
+            AttributeUse GetAttributeUse(u32 attribute) const {
+                AttributeUse result = AttributeUse::Unused;
+                for (u32 i = 0; i < 4; i++) {
+                    const auto index = GetAttributeIndexUse(attribute, i);
+                    if (index == AttributeUse::Unused) {
+                        continue;
+                    }
+                    if (result == AttributeUse::Unused || result == index) {
+                        result = index;
+                        continue;
+                    }
+                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
+                    if (index == AttributeUse::Perspective) {
+                        result = index;
+                    }
+                }
+                return result;
+            }
         } ps;
     };
 
-    u64 GetLocalMemorySize() {
+    u64 GetLocalMemorySize() const {
         return (common1.shader_local_memory_low_size |
                 (common2.shader_local_memory_high_size << 24));
     }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 08cf6268f..4461083ff 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,19 +3,24 @@
 // Refer to the license.txt file included.
 
 #include "common/assert.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_base.h"
 
 namespace Tegra {
 
 u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
     switch (format) {
     case PixelFormat::ABGR8:
+    case PixelFormat::BGRA8:
         return 4;
     default:
         return 4;
@@ -24,14 +29,15 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
     UNREACHABLE();
 }
 
-GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
-    memory_manager = std::make_unique<Tegra::MemoryManager>();
+GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
+    auto& rasterizer{renderer.Rasterizer()};
+    memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
     dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
-    maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
+    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
     fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
-    maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
-    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
+    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
 }
 
 GPU::~GPU() = default;
@@ -124,9 +130,36 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
     }
 }
 
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
 enum class BufferMethods {
-    BindObject = 0,
-    CountBufferMethods = 0x40,
+    BindObject = 0x0,
+    Nop = 0x2,
+    SemaphoreAddressHigh = 0x4,
+    SemaphoreAddressLow = 0x5,
+    SemaphoreSequence = 0x6,
+    SemaphoreTrigger = 0x7,
+    NotifyIntr = 0x8,
+    WrcacheFlush = 0x9,
+    Unk28 = 0xA,
+    Unk2c = 0xB,
+    RefCnt = 0x14,
+    SemaphoreAcquire = 0x1A,
+    SemaphoreRelease = 0x1B,
+    Unk70 = 0x1C,
+    Unk74 = 0x1D,
+    Unk78 = 0x1E,
+    Unk7c = 0x1F,
+    Yield = 0x20,
+    NonPullerMethods = 0x40,
+};
+
+enum class GpuSemaphoreOperation {
+    AcquireEqual = 0x1,
+    WriteLong = 0x2,
+    AcquireGequal = 0x4,
+    AcquireMask = 0x8,
 };
 
 void GPU::CallMethod(const MethodCall& method_call) {
@@ -135,20 +168,78 @@ void GPU::CallMethod(const MethodCall& method_call) {
 
     ASSERT(method_call.subchannel < bound_engines.size());
 
-    if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) {
-        // Bind the current subchannel to the desired engine id.
-        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
-                  method_call.argument);
-        bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
-        return;
+    if (ExecuteMethodOnEngine(method_call)) {
+        CallEngineMethod(method_call);
+    } else {
+        CallPullerMethod(method_call);
     }
+}
+
+bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
+    const auto method = static_cast<BufferMethods>(method_call.method);
+    return method >= BufferMethods::NonPullerMethods;
+}
 
-    if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
-        // TODO(Subv): Research and implement these methods.
-        LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
-        return;
+void GPU::CallPullerMethod(const MethodCall& method_call) {
+    regs.reg_array[method_call.method] = method_call.argument;
+    const auto method = static_cast<BufferMethods>(method_call.method);
+
+    switch (method) {
+    case BufferMethods::BindObject: {
+        ProcessBindMethod(method_call);
+        break;
+    }
+    case BufferMethods::Nop:
+    case BufferMethods::SemaphoreAddressHigh:
+    case BufferMethods::SemaphoreAddressLow:
+    case BufferMethods::SemaphoreSequence:
+    case BufferMethods::RefCnt:
+        break;
+    case BufferMethods::SemaphoreTrigger: {
+        ProcessSemaphoreTriggerMethod();
+        break;
+    }
+    case BufferMethods::NotifyIntr: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
+        break;
+    }
+    case BufferMethods::WrcacheFlush: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
+        break;
+    }
+    case BufferMethods::Unk28: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
+        break;
+    }
+    case BufferMethods::Unk2c: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
+        break;
+    }
+    case BufferMethods::SemaphoreAcquire: {
+        ProcessSemaphoreAcquire();
+        break;
     }
+    case BufferMethods::SemaphoreRelease: {
+        ProcessSemaphoreRelease();
+        break;
+    }
+    case BufferMethods::Yield: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
+        break;
+    }
+    default:
+        LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
+                  static_cast<u32>(method));
+        break;
+    }
+}
 
+void GPU::CallEngineMethod(const MethodCall& method_call) {
     const EngineID engine = bound_engines[method_call.subchannel];
 
     switch (engine) {
@@ -158,8 +249,8 @@ void GPU::CallMethod(const MethodCall& method_call) {
     case EngineID::MAXWELL_B:
         maxwell_3d->CallMethod(method_call);
         break;
-    case EngineID::MAXWELL_COMPUTE_B:
-        maxwell_compute->CallMethod(method_call);
+    case EngineID::KEPLER_COMPUTE_B:
+        kepler_compute->CallMethod(method_call);
         break;
     case EngineID::MAXWELL_DMA_COPY_A:
         maxwell_dma->CallMethod(method_call);
@@ -172,4 +263,72 @@ void GPU::CallMethod(const MethodCall& method_call) {
     }
 }
 
+void GPU::ProcessBindMethod(const MethodCall& method_call) {
+    // Bind the current subchannel to the desired engine id.
+    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
+              method_call.argument);
+    bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
+}
+
+void GPU::ProcessSemaphoreTriggerMethod() {
+    const auto semaphoreOperationMask = 0xF;
+    const auto op =
+        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
+    if (op == GpuSemaphoreOperation::WriteLong) {
+        struct Block {
+            u32 sequence;
+            u32 zeros = 0;
+            u64 timestamp;
+        };
+
+        Block block{};
+        block.sequence = regs.semaphore_sequence;
+        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
+        // CoreTiming
+        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
+        memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
+                                   sizeof(block));
+    } else {
+        const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
+        if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
+            (op == GpuSemaphoreOperation::AcquireGequal &&
+             static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
+            (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
+            // Nothing to do in this case
+        } else {
+            regs.acquire_source = true;
+            regs.acquire_value = regs.semaphore_sequence;
+            if (op == GpuSemaphoreOperation::AcquireEqual) {
+                regs.acquire_active = true;
+                regs.acquire_mode = false;
+            } else if (op == GpuSemaphoreOperation::AcquireGequal) {
+                regs.acquire_active = true;
+                regs.acquire_mode = true;
+            } else if (op == GpuSemaphoreOperation::AcquireMask) {
+                // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
+                // semaphore_sequence, gives a non-0 result
+                LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
+            } else {
+                LOG_ERROR(HW_GPU, "Invalid semaphore operation");
+            }
+        }
+    }
+}
+
+void GPU::ProcessSemaphoreRelease() {
+    memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
+}
+
+void GPU::ProcessSemaphoreAcquire() {
+    const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
+    const auto value = regs.semaphore_acquire;
+    if (word != value) {
+        regs.acquire_active = true;
+        regs.acquire_value = value;
+        // TODO(kemathe73) figure out how to do the acquire_timeout
+        regs.acquire_mode = false;
+        regs.acquire_source = false;
+    }
+}
+
 } // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index af5ccd1e9..de30ea354 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,16 +6,23 @@
 
 #include <array>
 #include <memory>
-#include <vector>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/dma_pusher.h"
-#include "video_core/memory_manager.h"
 
-namespace VideoCore {
-class RasterizerInterface;
+using CacheAddr = std::uintptr_t;
+inline CacheAddr ToCacheAddr(const void* host_ptr) {
+    return reinterpret_cast<CacheAddr>(host_ptr);
+}
+
+namespace Core {
+class System;
 }
 
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
 namespace Tegra {
 
 enum class RenderTargetFormat : u32 {
@@ -80,6 +87,7 @@ class DebugContext;
 struct FramebufferConfig {
     enum class PixelFormat : u32 {
         ABGR8 = 1,
+        BGRA8 = 5,
     };
 
     /**
@@ -96,29 +104,32 @@ struct FramebufferConfig {
 
     using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
     TransformFlags transform_flags;
-    MathUtil::Rectangle<int> crop_rect;
+    Common::Rectangle<int> crop_rect;
 };
 
 namespace Engines {
 class Fermi2D;
 class Maxwell3D;
-class MaxwellCompute;
 class MaxwellDMA;
+class KeplerCompute;
 class KeplerMemory;
 } // namespace Engines
 
 enum class EngineID {
     FERMI_TWOD_A = 0x902D, // 2D Engine
     MAXWELL_B = 0xB197,    // 3D Engine
-    MAXWELL_COMPUTE_B = 0xB1C0,
+    KEPLER_COMPUTE_B = 0xB1C0,
     KEPLER_INLINE_TO_MEMORY_B = 0xA140,
     MAXWELL_DMA_COPY_A = 0xB0B5,
 };
 
-class GPU final {
+class MemoryManager;
+
+class GPU {
 public:
-    explicit GPU(VideoCore::RasterizerInterface& rasterizer);
-    ~GPU();
+    explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
+
+    virtual ~GPU();
 
     struct MethodCall {
         u32 method{};
@@ -156,23 +167,115 @@ public:
     /// Returns a const reference to the GPU DMA pusher.
     const Tegra::DmaPusher& DmaPusher() const;
 
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x100;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS(0x4);
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+
+                    GPUVAddr SemaphoreAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } semaphore_address;
+
+                u32 semaphore_sequence;
+                u32 semaphore_trigger;
+                INSERT_PADDING_WORDS(0xC);
+
+                // The puser and the puller share the reference counter, the pusher only has read
+                // access
+                u32 reference_count;
+                INSERT_PADDING_WORDS(0x5);
+
+                u32 semaphore_acquire;
+                u32 semaphore_release;
+                INSERT_PADDING_WORDS(0xE4);
+
+                // Puller state
+                u32 acquire_mode;
+                u32 acquire_source;
+                u32 acquire_active;
+                u32 acquire_timeout;
+                u32 acquire_value;
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    /// Push GPU command entries to be processed
+    virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+
+    /// Swap buffers (render frame)
+    virtual void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
+
 private:
+    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessSemaphoreTriggerMethod();
+    void ProcessSemaphoreRelease();
+    void ProcessSemaphoreAcquire();
+
+    /// Calls a GPU puller method.
+    void CallPullerMethod(const MethodCall& method_call);
+
+    /// Calls a GPU engine method.
+    void CallEngineMethod(const MethodCall& method_call);
+
+    /// Determines where the method should be executed.
+    bool ExecuteMethodOnEngine(const MethodCall& method_call);
+
+protected:
     std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+    VideoCore::RendererBase& renderer;
+
+private:
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
 
-    /// Mapping of command subchannels to their bound engine ids.
+    /// Mapping of command subchannels to their bound engine ids
     std::array<EngineID, 8> bound_engines = {};
-
     /// 3D engine
     std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
     /// 2D engine
     std::unique_ptr<Engines::Fermi2D> fermi_2d;
     /// Compute engine
-    std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
+    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
     /// DMA engine
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
     /// Inline memory engine
     std::unique_ptr<Engines::KeplerMemory> kepler_memory;
 };
 
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(GPU::Regs, field_name) == position * 4,                                 \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(semaphore_address, 0x4);
+ASSERT_REG_POSITION(semaphore_sequence, 0x6);
+ASSERT_REG_POSITION(semaphore_trigger, 0x7);
+ASSERT_REG_POSITION(reference_count, 0x14);
+ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
+ASSERT_REG_POSITION(semaphore_release, 0x1B);
+
+ASSERT_REG_POSITION(acquire_mode, 0x100);
+ASSERT_REG_POSITION(acquire_source, 0x101);
+ASSERT_REG_POSITION(acquire_active, 0x102);
+ASSERT_REG_POSITION(acquire_timeout, 0x103);
+ASSERT_REG_POSITION(acquire_value, 0x104);
+
+#undef ASSERT_REG_POSITION
+
 } // namespace Tegra
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..db507cf04
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
+    : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
+
+GPUAsynch::~GPUAsynch() = default;
+
+void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
+    gpu_thread.SubmitList(std::move(entries));
+}
+
+void GPUAsynch::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    gpu_thread.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
+    gpu_thread.FlushRegion(addr, size);
+}
+
+void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
+    gpu_thread.InvalidateRegion(addr, size);
+}
+
+void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    gpu_thread.FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..1dcc61a6c
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+namespace GPUThread {
+class ThreadManager;
+} // namespace GPUThread
+
+/// Implementation of GPU interface that runs the GPU asynchronously
+class GPUAsynch : public Tegra::GPU {
+public:
+    explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
+    ~GPUAsynch() override;
+
+    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+
+private:
+    GPUThread::ThreadManager gpu_thread;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..2cfc900ed
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_synch.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
+    : Tegra::GPU(system, renderer) {}
+
+GPUSynch::~GPUSynch() = default;
+
+void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
+    dma_pusher->Push(std::move(entries));
+    dma_pusher->DispatchCalls();
+}
+
+void GPUSynch::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    renderer.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
+    renderer.Rasterizer().FlushRegion(addr, size);
+}
+
+void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
+    renderer.Rasterizer().InvalidateRegion(addr, size);
+}
+
+void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..766b5631c
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+/// Implementation of GPU interface that runs the GPU synchronously
+class GPUSynch : public Tegra::GPU {
+public:
+    explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
+    ~GPUSynch() override;
+
+    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..cc56cf467
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,121 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
+#include "core/frontend/scope_acquire_window_context.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon::GPUThread {
+
+/// Runs the GPU thread
+static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
+                      SynchState& state) {
+    MicroProfileOnThreadCreate("GpuThread");
+
+    // Wait for first GPU command before acquiring the window context
+    state.WaitForCommands();
+
+    // If emulation was stopped during disk shader loading, abort before trying to acquire context
+    if (!state.is_running) {
+        return;
+    }
+
+    Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+
+    CommandDataContainer next;
+    while (state.is_running) {
+        state.WaitForCommands();
+        while (!state.queue.Empty()) {
+            state.queue.Pop(next);
+            if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+                dma_pusher.Push(std::move(submit_list->entries));
+                dma_pusher.DispatchCalls();
+            } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
+                renderer.SwapBuffers(std::move(data->framebuffer));
+            } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
+                renderer.Rasterizer().FlushRegion(data->addr, data->size);
+            } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
+                renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+            } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+                return;
+            } else {
+                UNREACHABLE();
+            }
+            state.signaled_fence = next.fence;
+            state.TrySynchronize();
+        }
+    }
+}
+
+ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+                             Tegra::DmaPusher& dma_pusher)
+    : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
+    synchronization_event = system.CoreTiming().RegisterEvent(
+        "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
+}
+
+ThreadManager::~ThreadManager() {
+    // Notify GPU thread that a shutdown is pending
+    PushCommand(EndProcessingCommand());
+    thread.join();
+}
+
+void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
+    const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
+    const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
+    system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
+}
+
+void ThreadManager::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    PushCommand(SwapBuffersCommand(std::move(framebuffer)));
+}
+
+void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
+    PushCommand(FlushRegionCommand(addr, size));
+}
+
+void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
+    if (state.queue.Empty()) {
+        // It's quicker to invalidate a single region on the CPU if the queue is already empty
+        system.Renderer().Rasterizer().InvalidateRegion(addr, size);
+    } else {
+        PushCommand(InvalidateRegionCommand(addr, size));
+    }
+}
+
+void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
+    InvalidateRegion(addr, size);
+}
+
+u64 ThreadManager::PushCommand(CommandData&& command_data) {
+    const u64 fence{++state.last_fence};
+    state.queue.Push(CommandDataContainer(std::move(command_data), fence));
+    state.SignalCommands();
+    return fence;
+}
+
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+void SynchState::WaitForSynchronization(u64 fence) {
+    if (signaled_fence >= fence) {
+        return;
+    }
+
+    // Wait for the GPU to be idle (all commands to be executed)
+    {
+        MICROPROFILE_SCOPE(GPU_wait);
+        std::unique_lock<std::mutex> lock{synchronization_mutex};
+        synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
+    }
+}
+
+} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..62bcea5bb
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,173 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <variant>
+
+#include "common/threadsafe_queue.h"
+#include "video_core/gpu.h"
+
+namespace Tegra {
+struct FramebufferConfig;
+class DmaPusher;
+} // namespace Tegra
+
+namespace Core {
+class System;
+namespace Timing {
+struct EventType;
+} // namespace Timing
+} // namespace Core
+
+namespace VideoCommon::GPUThread {
+
+/// Command to signal to the GPU thread that processing has ended
+struct EndProcessingCommand final {};
+
+/// Command to signal to the GPU thread that a command list is ready for processing
+struct SubmitListCommand final {
+    explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
+
+    Tegra::CommandList entries;
+};
+
+/// Command to signal to the GPU thread that a swap buffers is pending
+struct SwapBuffersCommand final {
+    explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
+        : framebuffer{std::move(framebuffer)} {}
+
+    std::optional<Tegra::FramebufferConfig> framebuffer;
+};
+
+/// Command to signal to the GPU thread to flush a region
+struct FlushRegionCommand final {
+    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
+
+    CacheAddr addr;
+    u64 size;
+};
+
+/// Command to signal to the GPU thread to invalidate a region
+struct InvalidateRegionCommand final {
+    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
+
+    CacheAddr addr;
+    u64 size;
+};
+
+/// Command to signal to the GPU thread to flush and invalidate a region
+struct FlushAndInvalidateRegionCommand final {
+    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
+        : addr{addr}, size{size} {}
+
+    CacheAddr addr;
+    u64 size;
+};
+
+using CommandData =
+    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+struct CommandDataContainer {
+    CommandDataContainer() = default;
+
+    CommandDataContainer(CommandData&& data, u64 next_fence)
+        : data{std::move(data)}, fence{next_fence} {}
+
+    CommandDataContainer& operator=(const CommandDataContainer& t) {
+        data = std::move(t.data);
+        fence = t.fence;
+        return *this;
+    }
+
+    CommandData data;
+    u64 fence{};
+};
+
+/// Struct used to synchronize the GPU thread
+struct SynchState final {
+    std::atomic_bool is_running{true};
+    std::atomic_int queued_frame_count{};
+    std::mutex synchronization_mutex;
+    std::mutex commands_mutex;
+    std::condition_variable commands_condition;
+    std::condition_variable synchronization_condition;
+
+    /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
+    /// synchronized. This is entirely empirical.
+    bool IsSynchronized() const {
+        constexpr std::size_t max_queue_gap{5};
+        return queue.Size() <= max_queue_gap;
+    }
+
+    void TrySynchronize() {
+        if (IsSynchronized()) {
+            std::lock_guard<std::mutex> lock{synchronization_mutex};
+            synchronization_condition.notify_one();
+        }
+    }
+
+    void WaitForSynchronization(u64 fence);
+
+    void SignalCommands() {
+        if (queue.Empty()) {
+            return;
+        }
+
+        commands_condition.notify_one();
+    }
+
+    void WaitForCommands() {
+        std::unique_lock lock{commands_mutex};
+        commands_condition.wait(lock, [this] { return !queue.Empty(); });
+    }
+
+    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    CommandQueue queue;
+    u64 last_fence{};
+    std::atomic<u64> signaled_fence{};
+};
+
+/// Class used to manage the GPU thread
+class ThreadManager final {
+public:
+    explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+                           Tegra::DmaPusher& dma_pusher);
+    ~ThreadManager();
+
+    /// Push GPU command entries to be processed
+    void SubmitList(Tegra::CommandList&& entries);
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    void FlushRegion(CacheAddr addr, u64 size);
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    void InvalidateRegion(CacheAddr addr, u64 size);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
+
+private:
+    /// Pushes a command to be executed by the GPU thread
+    u64 PushCommand(CommandData&& command_data);
+
+private:
+    SynchState state;
+    Core::System& system;
+    Core::Timing::EventType* synchronization_event{};
+    std::thread thread;
+    std::thread::id thread_id;
+};
+
+} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 64f75db43..524d9ea5a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
 }
 
 u32 MacroInterpreter::FetchParameter() {
-    ASSERT(next_parameter_index < parameters.size());
-    return parameters[next_parameter_index++];
+    return parameters.at(next_parameter_index++);
 }
 
 u32 MacroInterpreter::GetRegister(u32 register_id) const {
-    // Register 0 is supposed to always return 0.
-    if (register_id == 0)
-        return 0;
-
-    ASSERT(register_id < registers.size());
-    return registers[register_id];
+    return registers.at(register_id);
 }
 
 void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
-    // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
-    // register.
-    if (register_id == 0)
+    // Register 0 is hardwired as the zero register.
+    // Ensure no writes to it actually occur.
+    if (register_id == 0) {
         return;
+    }
 
-    ASSERT(register_id < registers.size());
-    registers[register_id] = value;
+    registers.at(register_id) = value;
 }
 
 void MacroInterpreter::SetMethodAddress(u32 address) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 47247f097..0f4e820aa 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,180 +5,528 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
 
 namespace Tegra {
 
-MemoryManager::MemoryManager() {
-    // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
-    // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
-    // Undertale using 0 for a render target.
-    PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
+MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
+    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
+    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
+              Common::PageType::Unmapped);
+    page_table.Resize(address_space_width);
+
+    // Initialize the map with a single free region covering the entire managed space.
+    VirtualMemoryArea initial_vma;
+    initial_vma.size = address_space_end;
+    vma_map.emplace(initial_vma.base, initial_vma);
+
+    UpdatePageTableForVMA(initial_vma);
 }
 
 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
 
-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    AllocateMemory(gpu_addr, 0, aligned_size);
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
+    return gpu_addr;
+}
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
 
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
+    AllocateMemory(gpu_addr, 0, aligned_size);
 
-    return *gpu_addr;
+    return gpu_addr;
 }
 
-GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
 
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
+    return gpu_addr;
+}
+
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
+    ASSERT((gpu_addr & page_mask) == 0);
+
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
 
     return gpu_addr;
 }
 
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
+GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
+    ASSERT((gpu_addr & page_mask) == 0);
+
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
+
+    rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
+    UnmapRange(gpu_addr, aligned_size);
 
-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    return gpu_addr;
+}
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
+GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
+    // Find the first Free VMA.
+    const VMAHandle vma_handle{
+        std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
+            if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
+                return false;
+            }
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+            const VAddr vma_end{vma.second.base + vma.second.size};
+            return vma_end > region_start && vma_end >= region_start + size;
+        })};
 
-        slot = cpu_addr + offset;
+    if (vma_handle == vma_map.end()) {
+        return {};
     }
 
-    const MappedRegion region{cpu_addr, *gpu_addr, size};
-    mapped_regions.push_back(region);
+    return std::max(region_start, vma_handle->second.base);
+}
 
-    return *gpu_addr;
+bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
+    return (addr >> page_bits) < page_table.pointers.size();
 }
 
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
 
-    if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
-        // Page has been already mapped. In this case, we must find a new area of memory to use that
-        // is different than the specified one. Super Mario Odyssey hits this scenario when changing
-        // areas, but we do not want to overwrite the old pages.
-        // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+    const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
+    if (cpu_addr) {
+        return cpu_addr + (addr & page_mask);
+    }
+
+    return {};
+}
 
-        LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+template <typename T>
+T MemoryManager::Read(GPUVAddr addr) const {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
 
-        const std::optional<GPUVAddr> new_gpu_addr{
-            FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+    const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        T value;
+        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+        return value;
+    }
 
-        ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
+        return 0;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
+    }
+    return {};
+}
 
-        gpu_addr = *new_gpu_addr;
+template <typename T>
+void MemoryManager::Write(GPUVAddr addr, T data) {
+    if (!IsAddressValid(addr)) {
+        return;
     }
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+        return;
+    }
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+                  static_cast<u32>(data), addr);
+        return;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
+    }
+}
 
-        slot = cpu_addr + offset;
+template u8 MemoryManager::Read<u8>(GPUVAddr addr) const;
+template u16 MemoryManager::Read<u16>(GPUVAddr addr) const;
+template u32 MemoryManager::Read<u32>(GPUVAddr addr) const;
+template u64 MemoryManager::Read<u64>(GPUVAddr addr) const;
+template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
+template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
+template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
+template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
+
+u8* MemoryManager::GetPointer(GPUVAddr addr) {
+    if (!IsAddressValid(addr)) {
+        return {};
     }
 
-    const MappedRegion region{cpu_addr, gpu_addr, size};
-    mapped_regions.push_back(region);
+    u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer != nullptr) {
+        return page_pointer + (addr & page_mask);
+    }
 
-    return gpu_addr;
+    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
+    return {};
 }
 
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer != nullptr) {
+        return page_pointer + (addr & page_mask);
+    }
 
-        ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
-               slot != static_cast<u64>(PageStatus::Unmapped));
+    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
+    return {};
+}
 
-        slot = static_cast<u64>(PageStatus::Unmapped);
-    }
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            std::memcpy(dest_buffer, src_ptr, copy_amount);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
 
-    // Delete the region mappings that are contained within the unmapped region
-    mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
-                                        [&](const MappedRegion& region) {
-                                            return region.gpu_addr <= gpu_addr &&
-                                                   region.gpu_addr + region.size < gpu_addr + size;
-                                        }),
-                         mapped_regions.end());
-    return gpu_addr;
+        page_index++;
+        page_offset = 0;
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
 }
 
-GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
-    for (const auto& region : mapped_regions) {
-        const GPUVAddr region_end{region.gpu_addr + region.size};
-        if (region_start >= region.gpu_addr && region_start < region_end) {
-            return region_end;
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+    std::size_t remaining_size{size};
+    std::size_t page_index{dest_addr >> page_bits};
+    std::size_t page_offset{dest_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+            std::memcpy(dest_ptr, src_buffer, copy_amount);
+            break;
         }
+        default:
+            UNREACHABLE();
+        }
+
+        page_index++;
+        page_offset = 0;
+        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+        remaining_size -= copy_amount;
     }
-    return {};
 }
 
-std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                                     PageStatus status) {
-    GPUVAddr gpu_addr{region_start};
-    u64 free_space{};
-    align = (align + PAGE_MASK) & ~PAGE_MASK;
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            WriteBlock(dest_addr, src_ptr, copy_amount);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
 
-    while (gpu_addr + free_space < MAX_ADDRESS) {
-        if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
-            free_space += PAGE_SIZE;
-            if (free_space >= size) {
-                return gpu_addr;
-            }
-        } else {
-            gpu_addr += free_space + PAGE_SIZE;
-            free_space = 0;
-            gpu_addr = Common::AlignUp(gpu_addr, align);
+        page_index++;
+        page_offset = 0;
+        dest_addr += static_cast<VAddr>(copy_amount);
+        src_addr += static_cast<VAddr>(copy_amount);
+        remaining_size -= copy_amount;
+    }
+}
+
+void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                             VAddr backing_addr) {
+    LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
+              (base + size) * page_size);
+
+    const VAddr end{base + size};
+    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+               base + page_table.pointers.size());
+
+    std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
+
+    if (memory == nullptr) {
+        std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+        std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
+                  backing_addr);
+    } else {
+        while (base != end) {
+            page_table.pointers[base] = memory;
+            page_table.backing_addr[base] = backing_addr;
+
+            base += 1;
+            memory += page_size;
+            backing_addr += page_size;
         }
     }
+}
 
-    return {};
+void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
 }
 
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
-    const VAddr base_addr{PageSlot(gpu_addr)};
+void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
+}
 
-    if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
-        base_addr == static_cast<u64>(PageStatus::Unmapped)) {
+bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
+    ASSERT(base + size == next.base);
+    if (type != next.type) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
         return {};
     }
+    return true;
+}
+
+MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
+    if (target >= address_space_end) {
+        return vma_map.end();
+    } else {
+        return std::prev(vma_map.upper_bound(target));
+    }
+}
+
+MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    vma.type = VirtualMemoryArea::Type::Allocated;
+    vma.backing_addr = 0;
+    vma.backing_memory = {};
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
+}
+
+MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
+                                                       u64 size) {
+
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.offset = offset;
+
+    return Allocate(vma_handle);
+}
+
+MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
+                                                         VAddr backing_addr) {
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.type = VirtualMemoryArea::Type::Mapped;
+    vma.backing_memory = memory;
+    vma.backing_addr = backing_addr;
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
+}
+
+void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
+    VMAIter vma{CarveVMARange(target, size)};
+    const VAddr target_end{target + size};
+    const VMAIter end{vma_map.end()};
+
+    // The comparison against the end of the range must be done using addresses since VMAs can be
+    // merged during this process, causing invalidation of the iterators.
+    while (vma != end && vma->second.base < target_end) {
+        // Unmapped ranges return to allocated state and can be reused
+        // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
+        vma = std::next(Allocate(vma));
+    }
+
+    ASSERT(FindVMA(target)->second.size >= size);
+}
 
-    return base_addr + (gpu_addr & PAGE_MASK);
+MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
+    // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
+    // non-const access to its container.
+    return vma_map.erase(iter, iter); // Erases an empty range of elements
 }
 
-std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
-    std::vector<GPUVAddr> results;
-    for (const auto& region : mapped_regions) {
-        if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
-            const u64 offset{cpu_addr - region.cpu_addr};
-            results.push_back(region.gpu_addr + offset);
+MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
+
+    VMAIter vma_handle{StripIterConstness(FindVMA(base))};
+    if (vma_handle == vma_map.end()) {
+        // Target address is outside the managed range
+        return {};
+    }
+
+    const VirtualMemoryArea& vma{vma_handle->second};
+    if (vma.type == VirtualMemoryArea::Type::Mapped) {
+        // Region is already allocated
+        return vma_handle;
+    }
+
+    const VAddr start_in_vma{base - vma.base};
+    const VAddr end_in_vma{start_in_vma + size};
+
+    ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
+               vma.size, end_in_vma);
+
+    if (end_in_vma < vma.size) {
+        // Split VMA at the end of the allocated region
+        SplitVMA(vma_handle, end_in_vma);
+    }
+    if (start_in_vma != 0) {
+        // Split VMA at the start of the allocated region
+        vma_handle = SplitVMA(vma_handle, start_in_vma);
+    }
+
+    return vma_handle;
+}
+
+MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
+
+    const VAddr target_end{target + size};
+    ASSERT(target_end >= target);
+    ASSERT(size > 0);
+
+    VMAIter begin_vma{StripIterConstness(FindVMA(target))};
+    const VMAIter i_end{vma_map.lower_bound(target_end)};
+    if (std::any_of(begin_vma, i_end, [](const auto& entry) {
+            return entry.second.type == VirtualMemoryArea::Type::Unmapped;
+        })) {
+        return {};
+    }
+
+    if (target != begin_vma->second.base) {
+        begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+    }
+
+    VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
+    if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+        end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+    }
+
+    return begin_vma;
+}
+
+MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
+    VirtualMemoryArea& old_vma{vma_handle->second};
+    VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
+
+    // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
+    // a bug. This restriction might be removed later.
+    ASSERT(offset_in_vma < old_vma.size);
+    ASSERT(offset_in_vma > 0);
+
+    old_vma.size = offset_in_vma;
+    new_vma.base += offset_in_vma;
+    new_vma.size -= offset_in_vma;
+
+    switch (new_vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        new_vma.offset += offset_in_vma;
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        new_vma.backing_memory += offset_in_vma;
+        break;
+    }
+
+    ASSERT(old_vma.CanBeMergedWith(new_vma));
+
+    return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
+}
+
+MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
+    const VMAIter next_vma{std::next(iter)};
+    if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
+        iter->second.size += next_vma->second.size;
+        vma_map.erase(next_vma);
+    }
+
+    if (iter != vma_map.begin()) {
+        VMAIter prev_vma{std::prev(iter)};
+        if (prev_vma->second.CanBeMergedWith(iter->second)) {
+            prev_vma->second.size += iter->second.size;
+            vma_map.erase(iter);
+            iter = prev_vma;
         }
     }
-    return results;
+
+    return iter;
 }
 
-VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
-    auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
-    if (!block) {
-        block = std::make_unique<PageBlock>();
-        block->fill(static_cast<VAddr>(PageStatus::Unmapped));
+void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
+    switch (vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        UnmapRegion(vma.base, vma.size);
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
+        break;
     }
-    return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
 }
 
 } // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..647cbf93a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,67 +1,154 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
 
-#include <array>
-#include <memory>
+#include <map>
 #include <optional>
-#include <vector>
 
 #include "common/common_types.h"
+#include "common/page_table.h"
+
+namespace VideoCore {
+class RasterizerInterface;
+}
 
 namespace Tegra {
 
-/// Virtual addresses in the GPU's memory map are 64 bit.
-using GPUVAddr = u64;
+/**
+ * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
+ * with homogeneous attributes across its extents. In this particular implementation each VMA is
+ * also backed by a single host memory allocation.
+ */
+struct VirtualMemoryArea {
+    enum class Type : u8 {
+        Unmapped,
+        Allocated,
+        Mapped,
+    };
+
+    /// Virtual base address of the region.
+    GPUVAddr base{};
+    /// Size of the region.
+    u64 size{};
+    /// Memory area mapping type.
+    Type type{Type::Unmapped};
+    /// CPU memory mapped address corresponding to this memory area.
+    VAddr backing_addr{};
+    /// Offset into the backing_memory the mapping starts from.
+    std::size_t offset{};
+    /// Pointer backing this VMA.
+    u8* backing_memory{};
+
+    /// Tests if this area can be merged to the right with `next`.
+    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+};
 
 class MemoryManager final {
 public:
-    MemoryManager();
+    MemoryManager(VideoCore::RasterizerInterface& rasterizer);
 
     GPUVAddr AllocateSpace(u64 size, u64 align);
-    GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
+    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
     GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
-    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
-    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
-    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
-    std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
-    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
+    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
+    GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
+    std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+
+    template <typename T>
+    T Read(GPUVAddr addr) const;
+
+    template <typename T>
+    void Write(GPUVAddr addr, T data);
 
-    static constexpr u64 PAGE_BITS = 16;
-    static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
-    static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
+    u8* GetPointer(GPUVAddr addr);
+    const u8* GetPointer(GPUVAddr addr) const;
+
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 
 private:
-    enum class PageStatus : u64 {
-        Unmapped = 0xFFFFFFFFFFFFFFFFULL,
-        Allocated = 0xFFFFFFFFFFFFFFFEULL,
-        Reserved = 0xFFFFFFFFFFFFFFFDULL,
-    };
+    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
+    using VMAHandle = VMAMap::const_iterator;
+    using VMAIter = VMAMap::iterator;
 
-    std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                          PageStatus status);
-    VAddr& PageSlot(GPUVAddr gpu_addr);
-
-    static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
-    static constexpr u64 PAGE_TABLE_BITS{10};
-    static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
-    static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
-    static constexpr u64 PAGE_BLOCK_BITS{14};
-    static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
-    static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
-
-    using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
-    std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
-
-    struct MappedRegion {
-        VAddr cpu_addr;
-        GPUVAddr gpu_addr;
-        u64 size;
-    };
+    bool IsAddressValid(GPUVAddr addr) const;
+    void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                  VAddr backing_addr = 0);
+    void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
+    void UnmapRegion(GPUVAddr base, u64 size);
+
+    /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
+    VMAHandle FindVMA(GPUVAddr target) const;
+
+    VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
+
+    /**
+     * Maps an unmanaged host memory pointer at a given address.
+     *
+     * @param target The guest address to start the mapping at.
+     * @param memory The memory to be mapped.
+     * @param size Size of the mapping.
+     * @param state MemoryState tag to attach to the VMA.
+     */
+    VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+
+    /// Unmaps a range of addresses, splitting VMAs as necessary.
+    void UnmapRange(GPUVAddr target, u64 size);
+
+    /// Converts a VMAHandle to a mutable VMAIter.
+    VMAIter StripIterConstness(const VMAHandle& iter);
+
+    /// Marks as the specfied VMA as allocated.
+    VMAIter Allocate(VMAIter vma);
+
+    /**
+     * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
+     * the appropriate error checking.
+     */
+    VMAIter CarveVMA(GPUVAddr base, u64 size);
+
+    /**
+     * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+     * end of the range.
+     */
+    VMAIter CarveVMARange(GPUVAddr base, u64 size);
+
+    /**
+     * Splits a VMA in two, at the specified offset.
+     * @returns the right side of the split, with the original iterator becoming the left side.
+     */
+    VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
+
+    /**
+     * Checks for and merges the specified VMA with adjacent ones if possible.
+     * @returns the merged VMA or the original if no merging was possible.
+     */
+    VMAIter MergeAdjacent(VMAIter vma);
+
+    /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
+    void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+
+    /// Finds a free (unmapped region) of the specified size starting at the specified address.
+    GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
+
+private:
+    static constexpr u64 page_bits{16};
+    static constexpr u64 page_size{1 << page_bits};
+    static constexpr u64 page_mask{page_size - 1};
+
+    /// Address space in bits, this is fairly arbitrary but sufficiently large.
+    static constexpr u32 address_space_width{39};
+    /// Start address for mapping, this is fairly arbitrary but must be non-zero.
+    static constexpr GPUVAddr address_space_base{0x100000};
+    /// End of address space, based on address space in bits.
+    static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
 
-    std::vector<MappedRegion> mapped_regions;
+    Common::PageTable page_table{page_bits};
+    VMAMap vma_map;
+    VideoCore::RasterizerInterface& rasterizer;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index b68f4fb13..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "core/memory.h"
 #include "video_core/morton.h"
 #include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
 using Surface::GetBytesPerPixel;
 using Surface::PixelFormat;
 
-using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
+using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
 using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
 
 template <bool morton_to_linear, PixelFormat format>
 static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
-                       u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) {
+                       u32 tile_width_spacing, u8* buffer, u8* addr) {
     constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
 
     // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,150 +33,146 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
                                          stride, height, depth, block_height, block_depth,
                                          tile_width_spacing);
     } else {
-        Tegra::Texture::CopySwizzledData(
-            (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
-            depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
-            block_height, block_depth, tile_width_spacing);
+        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+                                         (height + tile_size_y - 1) / tile_size_y, depth,
+                                         bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
+                                         block_height, block_depth, tile_width_spacing);
     }
 }
 
 static constexpr ConversionArray morton_to_linear_fns = {
-    // clang-format off
-        MortonCopy<true, PixelFormat::ABGR8U>,
-        MortonCopy<true, PixelFormat::ABGR8S>,
-        MortonCopy<true, PixelFormat::ABGR8UI>,
-        MortonCopy<true, PixelFormat::B5G6R5U>,
-        MortonCopy<true, PixelFormat::A2B10G10R10U>,
-        MortonCopy<true, PixelFormat::A1B5G5R5U>,
-        MortonCopy<true, PixelFormat::R8U>,
-        MortonCopy<true, PixelFormat::R8UI>,
-        MortonCopy<true, PixelFormat::RGBA16F>,
-        MortonCopy<true, PixelFormat::RGBA16U>,
-        MortonCopy<true, PixelFormat::RGBA16UI>,
-        MortonCopy<true, PixelFormat::R11FG11FB10F>,
-        MortonCopy<true, PixelFormat::RGBA32UI>,
-        MortonCopy<true, PixelFormat::DXT1>,
-        MortonCopy<true, PixelFormat::DXT23>,
-        MortonCopy<true, PixelFormat::DXT45>,
-        MortonCopy<true, PixelFormat::DXN1>,
-        MortonCopy<true, PixelFormat::DXN2UNORM>,
-        MortonCopy<true, PixelFormat::DXN2SNORM>,
-        MortonCopy<true, PixelFormat::BC7U>,
-        MortonCopy<true, PixelFormat::BC6H_UF16>,
-        MortonCopy<true, PixelFormat::BC6H_SF16>,
-        MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
-        MortonCopy<true, PixelFormat::BGRA8>,
-        MortonCopy<true, PixelFormat::RGBA32F>,
-        MortonCopy<true, PixelFormat::RG32F>,
-        MortonCopy<true, PixelFormat::R32F>,
-        MortonCopy<true, PixelFormat::R16F>,
-        MortonCopy<true, PixelFormat::R16U>,
-        MortonCopy<true, PixelFormat::R16S>,
-        MortonCopy<true, PixelFormat::R16UI>,
-        MortonCopy<true, PixelFormat::R16I>,
-        MortonCopy<true, PixelFormat::RG16>,
-        MortonCopy<true, PixelFormat::RG16F>,
-        MortonCopy<true, PixelFormat::RG16UI>,
-        MortonCopy<true, PixelFormat::RG16I>,
-        MortonCopy<true, PixelFormat::RG16S>,
-        MortonCopy<true, PixelFormat::RGB32F>,
-        MortonCopy<true, PixelFormat::RGBA8_SRGB>,
-        MortonCopy<true, PixelFormat::RG8U>,
-        MortonCopy<true, PixelFormat::RG8S>,
-        MortonCopy<true, PixelFormat::RG32UI>,
-        MortonCopy<true, PixelFormat::R32UI>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
-        MortonCopy<true, PixelFormat::BGRA8_SRGB>,
-        MortonCopy<true, PixelFormat::DXT1_SRGB>,
-        MortonCopy<true, PixelFormat::DXT23_SRGB>,
-        MortonCopy<true, PixelFormat::DXT45_SRGB>,
-        MortonCopy<true, PixelFormat::BC7U_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
-        MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
-        MortonCopy<true, PixelFormat::Z32F>,
-        MortonCopy<true, PixelFormat::Z16>,
-        MortonCopy<true, PixelFormat::Z24S8>,
-        MortonCopy<true, PixelFormat::S8Z24>,
-        MortonCopy<true, PixelFormat::Z32FS8>,
-    // clang-format on
+    MortonCopy<true, PixelFormat::ABGR8U>,
+    MortonCopy<true, PixelFormat::ABGR8S>,
+    MortonCopy<true, PixelFormat::ABGR8UI>,
+    MortonCopy<true, PixelFormat::B5G6R5U>,
+    MortonCopy<true, PixelFormat::A2B10G10R10U>,
+    MortonCopy<true, PixelFormat::A1B5G5R5U>,
+    MortonCopy<true, PixelFormat::R8U>,
+    MortonCopy<true, PixelFormat::R8UI>,
+    MortonCopy<true, PixelFormat::RGBA16F>,
+    MortonCopy<true, PixelFormat::RGBA16U>,
+    MortonCopy<true, PixelFormat::RGBA16UI>,
+    MortonCopy<true, PixelFormat::R11FG11FB10F>,
+    MortonCopy<true, PixelFormat::RGBA32UI>,
+    MortonCopy<true, PixelFormat::DXT1>,
+    MortonCopy<true, PixelFormat::DXT23>,
+    MortonCopy<true, PixelFormat::DXT45>,
+    MortonCopy<true, PixelFormat::DXN1>,
+    MortonCopy<true, PixelFormat::DXN2UNORM>,
+    MortonCopy<true, PixelFormat::DXN2SNORM>,
+    MortonCopy<true, PixelFormat::BC7U>,
+    MortonCopy<true, PixelFormat::BC6H_UF16>,
+    MortonCopy<true, PixelFormat::BC6H_SF16>,
+    MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
+    MortonCopy<true, PixelFormat::BGRA8>,
+    MortonCopy<true, PixelFormat::RGBA32F>,
+    MortonCopy<true, PixelFormat::RG32F>,
+    MortonCopy<true, PixelFormat::R32F>,
+    MortonCopy<true, PixelFormat::R16F>,
+    MortonCopy<true, PixelFormat::R16U>,
+    MortonCopy<true, PixelFormat::R16S>,
+    MortonCopy<true, PixelFormat::R16UI>,
+    MortonCopy<true, PixelFormat::R16I>,
+    MortonCopy<true, PixelFormat::RG16>,
+    MortonCopy<true, PixelFormat::RG16F>,
+    MortonCopy<true, PixelFormat::RG16UI>,
+    MortonCopy<true, PixelFormat::RG16I>,
+    MortonCopy<true, PixelFormat::RG16S>,
+    MortonCopy<true, PixelFormat::RGB32F>,
+    MortonCopy<true, PixelFormat::RGBA8_SRGB>,
+    MortonCopy<true, PixelFormat::RG8U>,
+    MortonCopy<true, PixelFormat::RG8S>,
+    MortonCopy<true, PixelFormat::RG32UI>,
+    MortonCopy<true, PixelFormat::R32UI>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
+    MortonCopy<true, PixelFormat::BGRA8_SRGB>,
+    MortonCopy<true, PixelFormat::DXT1_SRGB>,
+    MortonCopy<true, PixelFormat::DXT23_SRGB>,
+    MortonCopy<true, PixelFormat::DXT45_SRGB>,
+    MortonCopy<true, PixelFormat::BC7U_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
+    MortonCopy<true, PixelFormat::Z32F>,
+    MortonCopy<true, PixelFormat::Z16>,
+    MortonCopy<true, PixelFormat::Z24S8>,
+    MortonCopy<true, PixelFormat::S8Z24>,
+    MortonCopy<true, PixelFormat::Z32FS8>,
 };
 
 static constexpr ConversionArray linear_to_morton_fns = {
-    // clang-format off
-        MortonCopy<false, PixelFormat::ABGR8U>,
-        MortonCopy<false, PixelFormat::ABGR8S>,
-        MortonCopy<false, PixelFormat::ABGR8UI>,
-        MortonCopy<false, PixelFormat::B5G6R5U>,
-        MortonCopy<false, PixelFormat::A2B10G10R10U>,
-        MortonCopy<false, PixelFormat::A1B5G5R5U>,
-        MortonCopy<false, PixelFormat::R8U>,
-        MortonCopy<false, PixelFormat::R8UI>,
-        MortonCopy<false, PixelFormat::RGBA16F>,
-        MortonCopy<false, PixelFormat::RGBA16U>,
-        MortonCopy<false, PixelFormat::RGBA16UI>,
-        MortonCopy<false, PixelFormat::R11FG11FB10F>,
-        MortonCopy<false, PixelFormat::RGBA32UI>,
-        MortonCopy<false, PixelFormat::DXT1>,
-        MortonCopy<false, PixelFormat::DXT23>,
-        MortonCopy<false, PixelFormat::DXT45>,
-        MortonCopy<false, PixelFormat::DXN1>,
-        MortonCopy<false, PixelFormat::DXN2UNORM>,
-        MortonCopy<false, PixelFormat::DXN2SNORM>,
-        MortonCopy<false, PixelFormat::BC7U>,
-        MortonCopy<false, PixelFormat::BC6H_UF16>,
-        MortonCopy<false, PixelFormat::BC6H_SF16>,
-        // TODO(Subv): Swizzling ASTC formats are not supported
-        nullptr,
-        MortonCopy<false, PixelFormat::BGRA8>,
-        MortonCopy<false, PixelFormat::RGBA32F>,
-        MortonCopy<false, PixelFormat::RG32F>,
-        MortonCopy<false, PixelFormat::R32F>,
-        MortonCopy<false, PixelFormat::R16F>,
-        MortonCopy<false, PixelFormat::R16U>,
-        MortonCopy<false, PixelFormat::R16S>,
-        MortonCopy<false, PixelFormat::R16UI>,
-        MortonCopy<false, PixelFormat::R16I>,
-        MortonCopy<false, PixelFormat::RG16>,
-        MortonCopy<false, PixelFormat::RG16F>,
-        MortonCopy<false, PixelFormat::RG16UI>,
-        MortonCopy<false, PixelFormat::RG16I>,
-        MortonCopy<false, PixelFormat::RG16S>,
-        MortonCopy<false, PixelFormat::RGB32F>,
-        MortonCopy<false, PixelFormat::RGBA8_SRGB>,
-        MortonCopy<false, PixelFormat::RG8U>,
-        MortonCopy<false, PixelFormat::RG8S>,
-        MortonCopy<false, PixelFormat::RG32UI>,
-        MortonCopy<false, PixelFormat::R32UI>,
-        nullptr,
-        nullptr,
-        nullptr,
-        MortonCopy<false, PixelFormat::BGRA8_SRGB>,
-        MortonCopy<false, PixelFormat::DXT1_SRGB>,
-        MortonCopy<false, PixelFormat::DXT23_SRGB>,
-        MortonCopy<false, PixelFormat::DXT45_SRGB>,
-        MortonCopy<false, PixelFormat::BC7U_SRGB>,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        MortonCopy<false, PixelFormat::Z32F>,
-        MortonCopy<false, PixelFormat::Z16>,
-        MortonCopy<false, PixelFormat::Z24S8>,
-        MortonCopy<false, PixelFormat::S8Z24>,
-        MortonCopy<false, PixelFormat::Z32FS8>,
-    // clang-format on
+    MortonCopy<false, PixelFormat::ABGR8U>,
+    MortonCopy<false, PixelFormat::ABGR8S>,
+    MortonCopy<false, PixelFormat::ABGR8UI>,
+    MortonCopy<false, PixelFormat::B5G6R5U>,
+    MortonCopy<false, PixelFormat::A2B10G10R10U>,
+    MortonCopy<false, PixelFormat::A1B5G5R5U>,
+    MortonCopy<false, PixelFormat::R8U>,
+    MortonCopy<false, PixelFormat::R8UI>,
+    MortonCopy<false, PixelFormat::RGBA16F>,
+    MortonCopy<false, PixelFormat::RGBA16U>,
+    MortonCopy<false, PixelFormat::RGBA16UI>,
+    MortonCopy<false, PixelFormat::R11FG11FB10F>,
+    MortonCopy<false, PixelFormat::RGBA32UI>,
+    MortonCopy<false, PixelFormat::DXT1>,
+    MortonCopy<false, PixelFormat::DXT23>,
+    MortonCopy<false, PixelFormat::DXT45>,
+    MortonCopy<false, PixelFormat::DXN1>,
+    MortonCopy<false, PixelFormat::DXN2UNORM>,
+    MortonCopy<false, PixelFormat::DXN2SNORM>,
+    MortonCopy<false, PixelFormat::BC7U>,
+    MortonCopy<false, PixelFormat::BC6H_UF16>,
+    MortonCopy<false, PixelFormat::BC6H_SF16>,
+    // TODO(Subv): Swizzling ASTC formats are not supported
+    nullptr,
+    MortonCopy<false, PixelFormat::BGRA8>,
+    MortonCopy<false, PixelFormat::RGBA32F>,
+    MortonCopy<false, PixelFormat::RG32F>,
+    MortonCopy<false, PixelFormat::R32F>,
+    MortonCopy<false, PixelFormat::R16F>,
+    MortonCopy<false, PixelFormat::R16U>,
+    MortonCopy<false, PixelFormat::R16S>,
+    MortonCopy<false, PixelFormat::R16UI>,
+    MortonCopy<false, PixelFormat::R16I>,
+    MortonCopy<false, PixelFormat::RG16>,
+    MortonCopy<false, PixelFormat::RG16F>,
+    MortonCopy<false, PixelFormat::RG16UI>,
+    MortonCopy<false, PixelFormat::RG16I>,
+    MortonCopy<false, PixelFormat::RG16S>,
+    MortonCopy<false, PixelFormat::RGB32F>,
+    MortonCopy<false, PixelFormat::RGBA8_SRGB>,
+    MortonCopy<false, PixelFormat::RG8U>,
+    MortonCopy<false, PixelFormat::RG8S>,
+    MortonCopy<false, PixelFormat::RG32UI>,
+    MortonCopy<false, PixelFormat::R32UI>,
+    nullptr,
+    nullptr,
+    nullptr,
+    MortonCopy<false, PixelFormat::BGRA8_SRGB>,
+    MortonCopy<false, PixelFormat::DXT1_SRGB>,
+    MortonCopy<false, PixelFormat::DXT23_SRGB>,
+    MortonCopy<false, PixelFormat::DXT45_SRGB>,
+    MortonCopy<false, PixelFormat::BC7U_SRGB>,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    MortonCopy<false, PixelFormat::Z32F>,
+    MortonCopy<false, PixelFormat::Z16>,
+    MortonCopy<false, PixelFormat::Z24S8>,
+    MortonCopy<false, PixelFormat::S8Z24>,
+    MortonCopy<false, PixelFormat::Z32FS8>,
 };
 
 static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
     return morton_to_linear_fns[static_cast<std::size_t>(format)];
 }
 
-/// 8x8 Z-Order coordinate from 2D coordinates
-static u32 MortonInterleave(u32 x, u32 y) {
-    static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
-    static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
-    return xlut[x % 8] + ylut[y % 8];
-}
-
-/// Calculates the offset of the position of the pixel in Morton order
-static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
-    // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
-    // of which is composed of four 2x2 subtiles each of which is composed of four texels.
-    // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
-    // texels are laid out in a 2x2 subtile like this:
-    // 2 3
-    // 0 1
-    //
-    // The full 8x8 tile has the texels arranged like this:
-    //
-    // 42 43 46 47 58 59 62 63
-    // 40 41 44 45 56 57 60 61
-    // 34 35 38 39 50 51 54 55
-    // 32 33 36 37 48 49 52 53
-    // 10 11 14 15 26 27 30 31
-    // 08 09 12 13 24 25 28 29
-    // 02 03 06 07 18 19 22 23
-    // 00 01 04 05 16 17 20 21
-    //
-    // This pattern is what's called Z-order curve, or Morton order.
-
-    const unsigned int block_height = 8;
-    const unsigned int coarse_x = x & ~7;
-
-    u32 i = MortonInterleave(x, y);
-
-    const unsigned int offset = coarse_x * block_height;
-
-    return (i + offset) * bytes_per_pixel;
-}
-
 static u32 MortonInterleave128(u32 x, u32 y) {
     // 128x128 Z-Order coordinate from 2D coordinates
     static constexpr u32 xlut[] = {
@@ -325,14 +281,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
 
 void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, std::size_t buffer_size, VAddr addr) {
-
+                   u8* buffer, u8* addr) {
     GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
-                                     tile_width_spacing, buffer, buffer_size, addr);
+                                     tile_width_spacing, buffer, addr);
 }
 
-void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
-                         u8* morton_data, u8* linear_data, bool morton_to_linear) {
+void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
+                         u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
+    const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
     u8* data_ptrs[2];
     for (u32 y = 0; y < height; ++y) {
         for (u32 x = 0; x < width; ++x) {
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index 065f59ce3..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
 
 void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, std::size_t buffer_size, VAddr addr);
+                   u8* buffer, u8* addr);
 
-void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
-                         u8* morton_data, u8* linear_data, bool morton_to_linear);
+void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
+                         u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
 
 } // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..291772186 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <mutex>
 #include <set>
 #include <unordered_map>
 
@@ -12,14 +13,26 @@
 
 #include "common/common_types.h"
 #include "core/settings.h"
+#include "video_core/gpu.h"
 #include "video_core/rasterizer_interface.h"
 
 class RasterizerCacheObject {
 public:
+    explicit RasterizerCacheObject(const u8* host_ptr)
+        : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
+
     virtual ~RasterizerCacheObject();
 
+    CacheAddr GetCacheAddr() const {
+        return cache_addr;
+    }
+
+    const u8* GetHostPtr() const {
+        return host_ptr;
+    }
+
     /// Gets the address of the shader in guest memory, required for cache management
-    virtual VAddr GetAddr() const = 0;
+    virtual VAddr GetCpuAddr() const = 0;
 
     /// Gets the size of the shader in guest memory, required for cache management
     virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
     bool is_registered{};      ///< Whether the object is currently registered with the cache
     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
+    const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
+    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
 };
 
 template <class T>
@@ -68,7 +83,9 @@ public:
     explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
 
     /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
+    void FlushRegion(CacheAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
         for (auto& object : objects) {
             FlushObject(object);
@@ -76,7 +93,9 @@ public:
     }
 
     /// Mark the specified region as being invalidated
-    void InvalidateRegion(VAddr addr, u64 size) {
+    void InvalidateRegion(CacheAddr addr, u64 size) {
+        std::lock_guard lock{mutex};
+
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
         for (auto& object : objects) {
             if (!object->IsRegistered()) {
@@ -89,49 +108,70 @@ public:
 
     /// Invalidates everything in the cache
     void InvalidateAll() {
+        std::lock_guard lock{mutex};
+
         while (interval_cache.begin() != interval_cache.end()) {
             Unregister(*interval_cache.begin()->second.begin());
         }
     }
 
 protected:
-    /// Tries to get an object from the cache with the specified address
-    T TryGet(VAddr addr) const {
+    /// Tries to get an object from the cache with the specified cache address
+    T TryGet(CacheAddr addr) const {
         const auto iter = map_cache.find(addr);
         if (iter != map_cache.end())
             return iter->second;
         return nullptr;
     }
 
+    T TryGet(const void* addr) const {
+        const auto iter = map_cache.find(ToCacheAddr(addr));
+        if (iter != map_cache.end())
+            return iter->second;
+        return nullptr;
+    }
+
     /// Register an object into the cache
-    void Register(const T& object) {
+    virtual void Register(const T& object) {
+        std::lock_guard lock{mutex};
+
         object->SetIsRegistered(true);
         interval_cache.add({GetInterval(object), ObjectSet{object}});
-        map_cache.insert({object->GetAddr(), object});
-        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
+        map_cache.insert({object->GetCacheAddr(), object});
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
     }
 
     /// Unregisters an object from the cache
-    void Unregister(const T& object) {
-        object->SetIsRegistered(false);
-        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
-        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
-        if (Settings::values.use_accurate_gpu_emulation) {
-            FlushObject(object);
-        }
+    virtual void Unregister(const T& object) {
+        std::lock_guard lock{mutex};
 
+        object->SetIsRegistered(false);
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
         interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(object->GetAddr());
+        map_cache.erase(object->GetCacheAddr());
     }
 
     /// Returns a ticks counter used for tracking when cached objects were last modified
     u64 GetModifiedTicks() {
+        std::lock_guard lock{mutex};
+
         return ++modified_ticks;
     }
 
+    /// Flushes the specified object, updating appropriate cache state as needed
+    void FlushObject(const T& object) {
+        std::lock_guard lock{mutex};
+
+        if (!object->IsDirty()) {
+            return;
+        }
+        object->Flush();
+        object->MarkAsModified(false, *this);
+    }
+
 private:
     /// Returns a list of cached objects from the specified memory region, ordered by access time
-    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
+    std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
         if (size == 0) {
             return {};
         }
@@ -154,27 +194,19 @@ private:
         return objects;
     }
 
-    /// Flushes the specified object, updating appropriate cache state as needed
-    void FlushObject(const T& object) {
-        if (!object->IsDirty()) {
-            return;
-        }
-        object->Flush();
-        object->MarkAsModified(false, *this);
-    }
-
     using ObjectSet = std::set<T>;
-    using ObjectCache = std::unordered_map<VAddr, T>;
-    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
+    using ObjectCache = std::unordered_map<CacheAddr, T>;
+    using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
     using ObjectInterval = typename IntervalCache::interval_type;
 
     static auto GetInterval(const T& object) {
-        return ObjectInterval::right_open(object->GetAddr(),
-                                          object->GetAddr() + object->GetSizeInBytes());
+        return ObjectInterval::right_open(object->GetCacheAddr(),
+                                          object->GetCacheAddr() + object->GetSizeInBytes());
     }
 
     ObjectCache map_cache;
     IntervalCache interval_cache; ///< Cache of objects
     u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
     VideoCore::RasterizerInterface& rasterizer;
+    std::recursive_mutex mutex;
 };
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index ff5310848..d7b86df38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,11 +4,11 @@
 
 #pragma once
 
+#include <atomic>
 #include <functional>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 
 namespace VideoCore {
 
@@ -34,23 +34,20 @@ public:
     virtual void FlushAll() = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     /// and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
 
     /// Attempt to use a faster method to perform a surface copy
     virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
-                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
-        return false;
-    }
-
-    /// Attempt to use a faster method to fill a region
-    virtual bool AccelerateFill(const void* config) {
+                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                                       const Common::Rectangle<u32>& src_rect,
+                                       const Common::Rectangle<u32>& dst_rect) {
         return false;
     }
 
@@ -65,6 +62,10 @@ public:
     }
 
     /// Increase/decrease the number of object in pages touching the specified region
-    virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
+    virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
+
+    /// Initialize disk cached resources for the game being emulated
+    virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
+                                   const DiskResourceLoadCallback& callback = {}) {}
 };
 } // namespace VideoCore
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/logging/log.h"
 #include "core/frontend/emu_window.h"
 #include "core/settings.h"
 #include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index bd2b30e77..25652e794 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,29 +7,34 @@
 
 #include "common/alignment.h"
 #include "core/core.h"
-#include "core/memory.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 
 namespace OpenGL {
 
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+                                     std::size_t alignment, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+      alignment{alignment} {}
+
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
     : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
 
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
-                                      std::size_t alignment, bool cache) {
+GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
+                                      bool cache) {
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
 
     // Cache management is a big overhead, so only cache entries with a given size.
     // TODO: Figure out which size is the best for given games.
     cache &= size >= 2048;
 
+    const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
     if (cache) {
-        auto entry = TryGet(*cpu_addr);
+        auto entry = TryGet(host_ptr);
         if (entry) {
-            if (entry->size >= size && entry->alignment == alignment) {
-                return entry->offset;
+            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+                return entry->GetOffset();
             }
             Unregister(entry);
         }
@@ -38,17 +43,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
     AlignBuffer(alignment);
     const GLintptr uploaded_offset = buffer_offset;
 
-    Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+    if (!host_ptr) {
+        return uploaded_offset;
+    }
 
+    std::memcpy(buffer_ptr, host_ptr, size);
     buffer_ptr += size;
     buffer_offset += size;
 
     if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>();
-        entry->offset = uploaded_offset;
-        entry->size = size;
-        entry->alignment = alignment;
-        entry->addr = *cpu_addr;
+        auto entry = std::make_shared<CachedBufferEntry>(
+            *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
         Register(entry);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
 
 class RasterizerOpenGL;
 
-struct CachedBufferEntry final : public RasterizerCacheObject {
-    VAddr GetAddr() const override {
-        return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+                               std::size_t alignment, u8* host_ptr);
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
         return size;
     }
 
+    std::size_t GetSize() const {
+        return size;
+    }
+
+    GLintptr GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetAlignment() const {
+        return alignment;
+    }
+
     // We do not have to flush this cache as things in it are never modified by us.
     void Flush() override {}
 
-    VAddr addr;
-    std::size_t size;
-    GLintptr offset;
-    std::size_t alignment;
+private:
+    VAddr cpu_addr{};
+    std::size_t size{};
+    GLintptr offset{};
+    std::size_t alignment{};
 };
 
 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -41,7 +58,7 @@ public:
 
     /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
     /// allocated.
-    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+    GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                           bool cache = true);
 
     /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7992b82c4..8d9ee81f1 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,21 +4,92 @@
 
 #include <glad/glad.h>
 
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/utils.h"
 
 namespace OpenGL {
 
-CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
     buffer.Create();
     // Bind and unbind the buffer so it gets allocated by the driver
     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
     glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
-    LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
+    LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
+}
+
+void CachedGlobalRegion::Reload(u32 size_) {
+    constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
+
+    size = size_;
+    if (size > max_size) {
+        size = max_size;
+        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
+                     max_size);
+    }
+
+    // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
+    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
+    glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
+    const auto search{reserve.find(addr)};
+    if (search == reserve.end()) {
+        return {};
+    }
+    return search->second;
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
+                                                              u8* host_ptr) {
+    GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
+    if (!region) {
+        // No reserved surface available, create a new one and reserve it
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+        const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
+        region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
+        ReserveGlobalRegion(region);
+    }
+    region->Reload(size);
+    return region;
+}
+
+void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
+    reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
 }
 
 GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
     : RasterizerCache{rasterizer} {}
 
+GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
+    const GLShader::GlobalMemoryEntry& global_region,
+    Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
+
+    auto& gpu{Core::System::GetInstance().GPU()};
+    auto& memory_manager{gpu.MemoryManager()};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+    const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
+                    global_region.GetCbufOffset()};
+    const auto actual_addr{memory_manager.Read<u64>(addr)};
+    const auto size{memory_manager.Read<u32>(addr + 8)};
+
+    // Look up global region in the cache based on address
+    const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
+    GlobalRegion region{TryGet(host_ptr)};
+
+    if (!region) {
+        // No global region found - create a new one
+        region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
+        Register(region);
+    }
+
+    return region;
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 406a735bc..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -5,9 +5,13 @@
 #pragma once
 
 #include <memory>
+#include <unordered_map>
+
 #include <glad/glad.h>
 
+#include "common/assert.h"
 #include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
@@ -23,15 +27,13 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
 
 class CachedGlobalRegion final : public RasterizerCacheObject {
 public:
-    explicit CachedGlobalRegion(VAddr addr, u32 size);
+    explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
 
-    /// Gets the address of the shader in guest memory, required for cache management
-    VAddr GetAddr() const {
-        return addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
-    /// Gets the size of the shader in guest memory, required for cache management
-    std::size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const override {
         return size;
     }
 
@@ -40,21 +42,34 @@ public:
         return buffer.handle;
     }
 
+    /// Reloads the global region from guest memory
+    void Reload(u32 size_);
+
     // TODO(Rodrigo): When global memory is written (STG), implement flushing
     void Flush() override {
         UNIMPLEMENTED();
     }
 
 private:
-    VAddr addr{};
+    VAddr cpu_addr{};
     u32 size{};
-
     OGLBuffer buffer;
 };
 
 class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
 public:
     explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
+
+    /// Gets the current specified shader stage program
+    GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
+                                 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
+
+private:
+    GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
+    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
+    void ReserveGlobalRegion(GlobalRegion region);
+
+    std::unordered_map<CacheAddr, GlobalRegion> reserve;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index d9ed08437..c3e94d917 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -7,7 +7,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "core/core.h"
-#include "core/memory.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
 
@@ -40,14 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
     return index_offset;
 }
 
-GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
-                                             u32 count) {
+GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
     const std::size_t map_size{CalculateQuadSize(count)};
     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
 
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
-    const u8* source{Memory::GetPointer(*cpu_addr)};
+    const u8* source{memory_manager.GetPointer(gpu_addr)};
 
     for (u32 primitive = 0; primitive < count / 4; ++primitive) {
         for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -62,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
     return index_offset;
 }
 
-} // namespace OpenGL
-\ No newline at end of file
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..4e87ce4d6 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -4,11 +4,9 @@
 
 #pragma once
 
-#include <vector>
 #include <glad/glad.h>
 
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 
 namespace OpenGL {
 
@@ -24,7 +22,7 @@ public:
 
     GLintptr MakeQuadArray(u32 first, u32 count);
 
-    GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+    GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
 
 private:
     OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2bf086902..d250d5cbb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -17,15 +17,14 @@
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
-#include "core/frontend/emu_window.h"
 #include "core/hle/kernel/process.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
-#include "video_core/video_core.h"
 
 namespace OpenGL {
 
@@ -99,9 +98,9 @@ struct FramebufferCacheKey {
     }
 };
 
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
+RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
+    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
+      screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
     // Create sampler objects
     for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
@@ -116,7 +115,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
 
     glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
 
-    LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
+    LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
     CheckExtensions();
 }
 
@@ -136,7 +135,7 @@ void RasterizerOpenGL::CheckExtensions() {
 }
 
 GLuint RasterizerOpenGL::SetupVertexFormat() {
-    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = system.GPU().Maxwell3D();
     const auto& regs = gpu.regs;
 
     if (!gpu.dirty_flags.vertex_attrib_format) {
@@ -175,7 +174,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
                 continue;
 
             const auto& buffer = regs.vertex_array[attrib.buffer];
-            LOG_TRACE(HW_GPU,
+            LOG_TRACE(Render_OpenGL,
                       "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
                       index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
                       attrib.offset.Value(), attrib.IsNormalized());
@@ -198,32 +197,32 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
     }
 
     // Rebinding the VAO invalidates the vertex buffer bindings.
-    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+    gpu.dirty_flags.vertex_array.set();
 
     state.draw.vertex_array = vao_entry.handle;
     return vao_entry.handle;
 }
 
 void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
-    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = system.GPU().Maxwell3D();
     const auto& regs = gpu.regs;
 
-    if (!gpu.dirty_flags.vertex_array)
+    if (gpu.dirty_flags.vertex_array.none())
         return;
 
     MICROPROFILE_SCOPE(OpenGL_VB);
 
     // Upload all guest vertex arrays sequentially to our buffer
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (~gpu.dirty_flags.vertex_array & (1u << index))
+        if (!gpu.dirty_flags.vertex_array[index])
             continue;
 
         const auto& vertex_array = regs.vertex_array[index];
         if (!vertex_array.IsEnabled())
             continue;
 
-        const Tegra::GPUVAddr start = vertex_array.StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = vertex_array.StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
 
         ASSERT(end > start);
         const u64 size = end - start + 1;
@@ -242,11 +241,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
         }
     }
 
-    gpu.dirty_flags.vertex_array = 0;
+    gpu.dirty_flags.vertex_array.reset();
 }
 
 DrawParameters RasterizerOpenGL::SetupDraw() {
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    const auto& gpu = system.GPU().Maxwell3D();
     const auto& regs = gpu.regs;
     const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
 
@@ -295,14 +294,15 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
 
 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     MICROPROFILE_SCOPE(OpenGL_Shader);
-    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = system.GPU().Maxwell3D();
 
-    // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
-    // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
-    u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
-    u32 current_texture_bindpoint = 0;
+    BaseBindings base_bindings;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
 
+    // Prepare packed bindings
+    bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
+    bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
+
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto& shader_config = gpu.regs.shader_config[index];
         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -320,47 +320,38 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
         const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
 
         GLShader::MaxwellUniformData ubo{};
-        ubo.SetFromRegs(gpu.state.shader_stages[stage]);
+        ubo.SetFromRegs(gpu, stage);
         const GLintptr offset = buffer_cache.UploadHostMemory(
             &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
 
-        // Bind the buffer
-        glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(),
-                          offset, static_cast<GLsizeiptr>(sizeof(ubo)));
+        // Bind the emulation info buffer
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
+                                 static_cast<GLsizeiptr>(sizeof(ubo)));
 
         Shader shader{shader_cache.GetStageProgram(program)};
+        const auto [program_handle, next_bindings] =
+            shader->GetProgramHandle(primitive_mode, base_bindings);
 
         switch (program) {
         case Maxwell::ShaderProgram::VertexA:
-        case Maxwell::ShaderProgram::VertexB: {
-            shader_program_manager->UseProgrammableVertexShader(
-                shader->GetProgramHandle(primitive_mode));
+        case Maxwell::ShaderProgram::VertexB:
+            shader_program_manager->UseProgrammableVertexShader(program_handle);
             break;
-        }
-        case Maxwell::ShaderProgram::Geometry: {
-            shader_program_manager->UseProgrammableGeometryShader(
-                shader->GetProgramHandle(primitive_mode));
+        case Maxwell::ShaderProgram::Geometry:
+            shader_program_manager->UseProgrammableGeometryShader(program_handle);
             break;
-        }
-        case Maxwell::ShaderProgram::Fragment: {
-            shader_program_manager->UseProgrammableFragmentShader(
-                shader->GetProgramHandle(primitive_mode));
+        case Maxwell::ShaderProgram::Fragment:
+            shader_program_manager->UseProgrammableFragmentShader(program_handle);
             break;
-        }
         default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
-                         shader_config.enable.Value(), shader_config.offset);
-            UNREACHABLE();
+            UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
+                              shader_config.enable.Value(), shader_config.offset);
         }
 
-        // Configure the const buffers for this shader stage.
-        current_constbuffer_bindpoint =
-            SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
-                              current_constbuffer_bindpoint);
-
-        // Configure the textures for this shader stage.
-        current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
-                                                  primitive_mode, current_texture_bindpoint);
+        const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
+        SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
+        SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
+        SetupTextures(stage_enum, shader, program_handle, base_bindings);
 
         // Workaround for Intel drivers.
         // When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -375,8 +366,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             // VertexB was combined with VertexA, so we skip the VertexB iteration
             index++;
         }
+
+        base_bindings = next_bindings;
     }
 
+    bind_ubo_pushbuffer.Bind();
+    bind_ssbo_pushbuffer.Bind();
+
     SyncClipEnabled(clip_distances);
 
     gpu.dirty_flags.shaders = false;
@@ -421,15 +417,15 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
 }
 
 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     std::size_t size = 0;
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
         if (!regs.vertex_array[index].IsEnabled())
             continue;
 
-        const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = regs.vertex_array[index].StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
 
         ASSERT(end > start);
         size += end - start + 1;
@@ -439,7 +435,7 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
 }
 
 std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     return static_cast<std::size_t>(regs.index_array.count) *
            static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
@@ -486,21 +482,26 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
         cached_pages.add({pages_interval, delta});
 }
 
-void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
-                                             bool using_depth_fb, bool preserve_contents,
-                                             std::optional<std::size_t> single_color_target) {
+void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
+                                         const VideoCore::DiskResourceLoadCallback& callback) {
+    shader_cache.LoadDiskCache(stop_loading, callback);
+}
+
+std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
+    OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
+    std::optional<std::size_t> single_color_target) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = system.GPU().Maxwell3D();
     const auto& regs = gpu.regs;
 
     const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
                                                  single_color_target};
-    if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
-        !gpu.dirty_flags.zeta_buffer) {
+    if (fb_config_state == current_framebuffer_config_state &&
+        gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
         // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
         // single color targets). This is done because the guest registers may not change but the
         // host framebuffer may contain different attachments
-        return;
+        return current_depth_stencil_usage;
     }
     current_framebuffer_config_state = fb_config_state;
 
@@ -509,10 +510,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
         depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
     }
 
-    // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
-    // used to enable multiple render targets. However, it is left unset on all games that I have
-    // tested.
-    UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
+    UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
 
     // Bind the framebuffer surfaces
     current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
@@ -573,19 +571,21 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
         depth_surface->MarkAsModified(true, res_cache);
 
         fbkey.zeta = depth_surface->Texture().handle;
-        fbkey.stencil_enable = regs.stencil_enable;
+        fbkey.stencil_enable = regs.stencil_enable &&
+                               depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
     }
 
     SetupCachedFramebuffer(fbkey, current_state);
-
     SyncViewport(current_state);
+
+    return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
 }
 
 void RasterizerOpenGL::Clear() {
     const auto prev_state{state};
     SCOPE_EXIT({ prev_state.Apply(); });
 
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     bool use_color{};
     bool use_depth{};
     bool use_stencil{};
@@ -646,10 +646,8 @@ void RasterizerOpenGL::Clear() {
         return;
     }
 
-    ScopeAcquireGLContext acquire_context{emu_window};
-
-    ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
-                          regs.clear_buffers.RT.Value());
+    const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
+        clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
     if (regs.clear_flags.scissor) {
         SyncScissorTest(clear_state);
     }
@@ -664,11 +662,11 @@ void RasterizerOpenGL::Clear() {
         glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
     }
 
-    if (use_depth && use_stencil) {
+    if (clear_depth && clear_stencil) {
         glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
-    } else if (use_depth) {
+    } else if (clear_depth) {
         glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
-    } else if (use_stencil) {
+    } else if (clear_stencil) {
         glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
     }
 }
@@ -678,11 +676,9 @@ void RasterizerOpenGL::DrawArrays() {
         return;
 
     MICROPROFILE_SCOPE(OpenGL_Drawing);
-    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = system.GPU().Maxwell3D();
     const auto& regs = gpu.regs;
 
-    ScopeAcquireGLContext acquire_context{emu_window};
-
     ConfigureFramebuffers(state);
     SyncColorMask();
     SyncFragmentColorClampState();
@@ -728,10 +724,10 @@ void RasterizerOpenGL::DrawArrays() {
     // Add space for at least 18 constant buffers
     buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
 
-    bool invalidate = buffer_cache.Map(buffer_size);
+    const bool invalidate = buffer_cache.Map(buffer_size);
     if (invalidate) {
         // As all cached buffers are invalidated, we need to recheck their state.
-        gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+        gpu.dirty_flags.vertex_array.set();
     }
 
     const GLuint vao = SetupVertexFormat();
@@ -745,60 +741,45 @@ void RasterizerOpenGL::DrawArrays() {
     shader_program_manager->ApplyTo(state);
     state.Apply();
 
-    // Execute draw call
+    res_cache.SignalPreDrawCall();
     params.DispatchDraw();
-
-    // Disable scissor test
-    state.viewports[0].scissor.enabled = false;
+    res_cache.SignalPostDrawCall();
 
     accelerate_draw = AccelDraw::Disabled;
-
-    // Unbind textures for potential future use as framebuffer attachments
-    for (auto& texture_unit : state.texture_units) {
-        texture_unit.Unbind();
-    }
-    state.Apply();
 }
 
 void RasterizerOpenGL::FlushAll() {}
 
-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-
-    if (Settings::values.use_accurate_gpu_emulation) {
-        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
-        res_cache.FlushRegion(addr, size);
+    if (!addr || !size) {
+        return;
     }
+    res_cache.FlushRegion(addr, size);
 }
 
-void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (!addr || !size) {
+        return;
+    }
     res_cache.InvalidateRegion(addr, size);
     shader_cache.InvalidateRegion(addr, size);
     global_cache.InvalidateRegion(addr, size);
     buffer_cache.InvalidateRegion(addr, size);
 }
 
-void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
     FlushRegion(addr, size);
     InvalidateRegion(addr, size);
 }
 
 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
-                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
+                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                                             const Common::Rectangle<u32>& src_rect,
+                                             const Common::Rectangle<u32>& dst_rect) {
     MICROPROFILE_SCOPE(OpenGL_Blits);
-
-    if (Settings::values.use_accurate_gpu_emulation) {
-        // Skip the accelerated copy and perform a slow but more accurate copy
-        return false;
-    }
-
-    res_cache.FermiCopySurface(src, dst);
-    return true;
-}
-
-bool RasterizerOpenGL::AccelerateFill(const void* config) {
-    UNREACHABLE();
+    res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
     return true;
 }
 
@@ -810,7 +791,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 
-    const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
+    const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
     if (!surface) {
         return {};
     }
@@ -821,7 +802,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
         VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
     ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
     ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
-    ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
+
+    if (params.pixel_format != pixel_format) {
+        LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
+    }
 
     screen_info.display_texture = surface->Texture().handle;
 
@@ -830,121 +814,98 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 
 void RasterizerOpenGL::SamplerInfo::Create() {
     sampler.Create();
-    mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear;
-    wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap;
-    uses_depth_compare = false;
+    mag_filter = Tegra::Texture::TextureFilter::Linear;
+    min_filter = Tegra::Texture::TextureFilter::Linear;
+    wrap_u = Tegra::Texture::WrapMode::Wrap;
+    wrap_v = Tegra::Texture::WrapMode::Wrap;
+    wrap_p = Tegra::Texture::WrapMode::Wrap;
+    use_depth_compare = false;
     depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
 
-    // default is GL_LINEAR_MIPMAP_LINEAR
+    // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
     glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    // Other attributes have correct defaults
     glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
+
+    // Other attributes have correct defaults
 }
 
 void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
-    const GLuint s = sampler.handle;
+    const GLuint sampler_id = sampler.handle;
     if (mag_filter != config.mag_filter) {
         mag_filter = config.mag_filter;
         glSamplerParameteri(
-            s, GL_TEXTURE_MAG_FILTER,
+            sampler_id, GL_TEXTURE_MAG_FILTER,
             MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
     }
-    if (min_filter != config.min_filter || mip_filter != config.mip_filter) {
+    if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
         min_filter = config.min_filter;
-        mip_filter = config.mip_filter;
-        glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,
-                            MaxwellToGL::TextureFilterMode(min_filter, mip_filter));
+        mipmap_filter = config.mipmap_filter;
+        glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
+                            MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
     }
 
     if (wrap_u != config.wrap_u) {
         wrap_u = config.wrap_u;
-        glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
+        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
     }
     if (wrap_v != config.wrap_v) {
         wrap_v = config.wrap_v;
-        glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
+        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
     }
     if (wrap_p != config.wrap_p) {
         wrap_p = config.wrap_p;
-        glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
+        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
     }
 
-    if (uses_depth_compare != (config.depth_compare_enabled == 1)) {
-        uses_depth_compare = (config.depth_compare_enabled == 1);
-        if (uses_depth_compare) {
-            glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
-        } else {
-            glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE);
-        }
+    if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
+        use_depth_compare = enabled;
+        glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
+                            use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
     }
 
     if (depth_compare_func != config.depth_compare_func) {
         depth_compare_func = config.depth_compare_func;
-        glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC,
+        glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
                             MaxwellToGL::DepthCompareFunc(depth_compare_func));
     }
 
-    GLvec4 new_border_color;
-    if (config.srgb_conversion) {
-        new_border_color[0] = config.srgb_border_color_r / 255.0f;
-        new_border_color[1] = config.srgb_border_color_g / 255.0f;
-        new_border_color[2] = config.srgb_border_color_g / 255.0f;
-    } else {
-        new_border_color[0] = config.border_color_r;
-        new_border_color[1] = config.border_color_g;
-        new_border_color[2] = config.border_color_b;
-    }
-    new_border_color[3] = config.border_color_a;
-
-    if (border_color != new_border_color) {
+    if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
         border_color = new_border_color;
-        glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data());
+        glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
     }
 
-    const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value());
-    if (anisotropic_max != max_anisotropic) {
-        max_anisotropic = anisotropic_max;
+    if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
+        max_anisotropic = anisotropic;
         if (GLAD_GL_ARB_texture_filter_anisotropic) {
-            glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
+            glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
         } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
-            glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
+            glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
         }
     }
-    const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f;
-    if (lod_min != min_lod) {
-        min_lod = lod_min;
-        glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod);
-    }
 
-    const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f;
-    if (lod_max != max_lod) {
-        max_lod = lod_max;
-        glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod);
+    if (const float min = config.GetMinLod(); min_lod != min) {
+        min_lod = min;
+        glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
+    }
+    if (const float max = config.GetMaxLod(); max_lod != max) {
+        max_lod = max;
+        glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
     }
-    const u32 bias = config.mip_lod_bias.Value();
-    // Sign extend the 13-bit value.
-    constexpr u32 mask = 1U << (13 - 1);
-    const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f;
-    if (lod_bias != bias_lod) {
-        lod_bias = bias_lod;
-        glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias);
+
+    if (const float bias = config.GetLodBias(); lod_bias != bias) {
+        lod_bias = bias;
+        glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
     }
 }
 
-u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
-                                        GLenum primitive_mode, u32 current_bindpoint) {
+void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+                                         const Shader& shader, GLuint program_handle,
+                                         BaseBindings base_bindings) {
     MICROPROFILE_SCOPE(OpenGL_UBO);
-    const auto& gpu = Core::System::GetInstance().GPU();
+    const auto& gpu = system.GPU();
     const auto& maxwell3d = gpu.Maxwell3D();
     const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
-    const auto& entries = shader->GetShaderEntries().const_buffer_entries;
-
-    constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
-    std::array<GLuint, max_binds> bind_buffers;
-    std::array<GLintptr, max_binds> bind_offsets;
-    std::array<GLsizeiptr, max_binds> bind_sizes;
-
-    ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
+    const auto& entries = shader->GetShaderEntries().const_buffers;
 
     // Upload only the enabled buffers from the 16 constbuffers of each shader stage
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
@@ -952,10 +913,8 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
         const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
 
         if (!buffer.enabled) {
-            // With disabled buffers set values as zero to unbind them
-            bind_buffers[bindpoint] = 0;
-            bind_offsets[bindpoint] = 0;
-            bind_sizes[bindpoint] = 0;
+            // Set values to zero to unbind buffers
+            bind_ubo_pushbuffer.Push(0, 0, 0);
             continue;
         }
 
@@ -966,13 +925,13 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
             size = buffer.size;
 
             if (size > MaxConstbufferSize) {
-                LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
-                             MaxConstbufferSize);
+                LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
+                            MaxConstbufferSize);
                 size = MaxConstbufferSize;
             }
         } else {
             // Buffer is accessed directly, upload just what we use
-            size = used_buffer.GetSize() * sizeof(float);
+            size = used_buffer.GetSize();
         }
 
         // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
@@ -980,79 +939,56 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
         size = Common::AlignUp(size, sizeof(GLvec4));
         ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
 
-        GLintptr const_buffer_offset = buffer_cache.UploadMemory(
+        const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
             buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
 
-        // Now configure the bindpoint of the buffer inside the shader
-        glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
-                              shader->GetProgramResourceIndex(used_buffer),
-                              current_bindpoint + bindpoint);
-
-        // Prepare values for multibind
-        bind_buffers[bindpoint] = buffer_cache.GetHandle();
-        bind_offsets[bindpoint] = const_buffer_offset;
-        bind_sizes[bindpoint] = size;
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
     }
+}
 
-    glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
-                       bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
-
-    return current_bindpoint + static_cast<u32>(entries.size());
+void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+                                          const Shader& shader, GLenum primitive_mode,
+                                          BaseBindings base_bindings) {
+    const auto& entries = shader->GetShaderEntries().global_memory_entries;
+    for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+        const auto& entry{entries[bindpoint]};
+        const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+        bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
+                                  static_cast<GLsizeiptr>(region->GetSizeInBytes()));
+    }
 }
 
-u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
-                                    GLenum primitive_mode, u32 current_unit) {
+void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
+                                     GLuint program_handle, BaseBindings base_bindings) {
     MICROPROFILE_SCOPE(OpenGL_Texture);
-    const auto& gpu = Core::System::GetInstance().GPU();
+    const auto& gpu = system.GPU();
     const auto& maxwell3d = gpu.Maxwell3D();
-    const auto& entries = shader->GetShaderEntries().texture_samplers;
+    const auto& entries = shader->GetShaderEntries().samplers;
 
-    ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
+    ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
                "Exceeded the number of active textures.");
 
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry = entries[bindpoint];
-        const u32 current_bindpoint = current_unit + bindpoint;
-
-        // Bind the uniform to the sampler.
-
-        glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
-                           shader->GetUniformLocation(entry), current_bindpoint);
-
-        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
-
-        if (!texture.enabled) {
-            state.texture_units[current_bindpoint].texture = 0;
-            continue;
-        }
+        const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+        const u32 current_bindpoint = base_bindings.sampler + bindpoint;
 
         texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
-        Surface surface = res_cache.GetTextureSurface(texture, entry);
-        if (surface != nullptr) {
-            const GLuint handle =
-                entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
-            const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
-            state.texture_units[current_bindpoint].texture = handle;
-            state.texture_units[current_bindpoint].target = target;
-            state.texture_units[current_bindpoint].swizzle.r =
-                MaxwellToGL::SwizzleSource(texture.tic.x_source);
-            state.texture_units[current_bindpoint].swizzle.g =
-                MaxwellToGL::SwizzleSource(texture.tic.y_source);
-            state.texture_units[current_bindpoint].swizzle.b =
-                MaxwellToGL::SwizzleSource(texture.tic.z_source);
-            state.texture_units[current_bindpoint].swizzle.a =
-                MaxwellToGL::SwizzleSource(texture.tic.w_source);
+
+        if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
+            state.texture_units[current_bindpoint].texture =
+                surface->Texture(entry.IsArray()).handle;
+            surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
+                                   texture.tic.w_source);
         } else {
             // Can occur when texture addr is null or its memory is unmapped/invalid
             state.texture_units[current_bindpoint].texture = 0;
         }
     }
-
-    return current_unit + static_cast<u32>(entries.size());
 }
 
 void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     const bool geometry_shaders_enabled =
         regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
     const std::size_t viewport_count =
@@ -1060,7 +996,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
     for (std::size_t i = 0; i < viewport_count; i++) {
         auto& viewport = current_state.viewports[i];
         const auto& src = regs.viewports[i];
-        const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+        const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
         viewport.x = viewport_rect.left;
         viewport.y = viewport_rect.bottom;
         viewport.width = viewport_rect.GetWidth();
@@ -1075,7 +1011,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
 void RasterizerOpenGL::SyncClipEnabled(
     const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
 
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
         regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
         regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
@@ -1092,7 +1028,7 @@ void RasterizerOpenGL::SyncClipCoef() {
 }
 
 void RasterizerOpenGL::SyncCullMode() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.cull.enabled = regs.cull.enabled != 0;
 
@@ -1116,14 +1052,14 @@ void RasterizerOpenGL::SyncCullMode() {
 }
 
 void RasterizerOpenGL::SyncPrimitiveRestart() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.primitive_restart.enabled = regs.primitive_restart.enabled;
     state.primitive_restart.index = regs.primitive_restart.index;
 }
 
 void RasterizerOpenGL::SyncDepthTestState() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.depth.test_enabled = regs.depth_test_enable != 0;
     state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
@@ -1135,7 +1071,7 @@ void RasterizerOpenGL::SyncDepthTestState() {
 }
 
 void RasterizerOpenGL::SyncStencilTestState() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     state.stencil.test_enabled = regs.stencil_enable != 0;
 
     if (!regs.stencil_enable) {
@@ -1169,7 +1105,7 @@ void RasterizerOpenGL::SyncStencilTestState() {
 }
 
 void RasterizerOpenGL::SyncColorMask() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     const std::size_t count =
         regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
     for (std::size_t i = 0; i < count; i++) {
@@ -1183,18 +1119,18 @@ void RasterizerOpenGL::SyncColorMask() {
 }
 
 void RasterizerOpenGL::SyncMultiSampleState() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
     state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
 }
 
 void RasterizerOpenGL::SyncFragmentColorClampState() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
 }
 
 void RasterizerOpenGL::SyncBlendState() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.blend_color.red = regs.blend_color.r;
     state.blend_color.green = regs.blend_color.g;
@@ -1236,7 +1172,7 @@ void RasterizerOpenGL::SyncBlendState() {
 }
 
 void RasterizerOpenGL::SyncLogicOpState() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.logic_op.enabled = regs.logic_op.enable != 0;
 
@@ -1250,7 +1186,7 @@ void RasterizerOpenGL::SyncLogicOpState() {
 }
 
 void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     const bool geometry_shaders_enabled =
         regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
     const std::size_t viewport_count =
@@ -1272,21 +1208,17 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
 }
 
 void RasterizerOpenGL::SyncTransformFeedback() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
-    if (regs.tfb_enabled != 0) {
-        LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
-        UNREACHABLE();
-    }
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
 }
 
 void RasterizerOpenGL::SyncPointState() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     state.point.size = regs.point_size;
 }
 
 void RasterizerOpenGL::SyncPolygonOffset() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
     state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
     state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
     state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
@@ -1296,13 +1228,9 @@ void RasterizerOpenGL::SyncPolygonOffset() {
 }
 
 void RasterizerOpenGL::CheckAlphaTests() {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
-    if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
-        LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
-                                    "this behavior is undefined.");
-        UNREACHABLE();
-    }
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
+                         "Alpha Testing is enabled with more than one rendertarget");
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 21c51f874..e4c64ae71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,21 +5,19 @@
 #pragma once
 
 #include <array>
+#include <atomic>
 #include <cstddef>
 #include <map>
 #include <memory>
 #include <optional>
 #include <tuple>
 #include <utility>
-#include <vector>
 
 #include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
 #include <glad/glad.h>
 
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -28,10 +26,13 @@
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
+#include "video_core/renderer_opengl/utils.h"
+
+namespace Core {
+class System;
+}
 
 namespace Core::Frontend {
 class EmuWindow;
@@ -45,22 +46,25 @@ struct FramebufferCacheKey;
 
 class RasterizerOpenGL : public VideoCore::RasterizerInterface {
 public:
-    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info);
+    explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
     ~RasterizerOpenGL() override;
 
     void DrawArrays() override;
     void Clear() override;
     void FlushAll() override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
-                               const Tegra::Engines::Fermi2D::Regs::Surface& dst) override;
-    bool AccelerateFill(const void* config) override;
+                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                               const Common::Rectangle<u32>& src_rect,
+                               const Common::Rectangle<u32>& dst_rect) override;
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
     bool AccelerateDrawBatch(bool is_indexed) override;
-    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
+    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
+    void LoadDiskResources(const std::atomic_bool& stop_loading,
+                           const VideoCore::DiskResourceLoadCallback& callback) override;
 
     /// Maximum supported size that a constbuffer can have in bytes.
     static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -85,11 +89,12 @@ private:
     private:
         Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
         Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
-        Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None;
+        Tegra::Texture::TextureMipmapFilter mipmap_filter =
+            Tegra::Texture::TextureMipmapFilter::None;
         Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
         Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
         Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
-        bool uses_depth_compare = false;
+        bool use_depth_compare = false;
         Tegra::Texture::DepthCompareFunc depth_compare_func =
             Tegra::Texture::DepthCompareFunc::Always;
         GLvec4 border_color = {};
@@ -122,30 +127,25 @@ private:
      * @param using_depth_fb If true, configure the depth/stencil framebuffer.
      * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
      * @param single_color_target Specifies if a single color buffer target should be used.
+     * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
+     * (requires using_depth_fb to be true)
      */
-    void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
-                               bool using_depth_fb = true, bool preserve_contents = true,
-                               std::optional<std::size_t> single_color_target = {});
+    std::pair<bool, bool> ConfigureFramebuffers(
+        OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true,
+        bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
 
-    /**
-     * Configures the current constbuffers to use for the draw command.
-     * @param stage The shader stage to configure buffers for.
-     * @param shader The shader object that contains the specified stage.
-     * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
-     * @returns The next available bindpoint for use in the next shader stage.
-     */
-    u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
-                          GLenum primitive_mode, u32 current_bindpoint);
+    /// Configures the current constbuffers to use for the draw command.
+    void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
+                           GLuint program_handle, BaseBindings base_bindings);
 
-    /**
-     * Configures the current textures to use for the draw command.
-     * @param stage The shader stage to configure textures for.
-     * @param shader The shader object that contains the specified stage.
-     * @param current_unit The offset at which to start counting unused texture units.
-     * @returns The next available bindpoint for use in the next shader stage.
-     */
-    u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
-                      GLenum primitive_mode, u32 current_unit);
+    /// Configures the current global memory entries to use for the draw command.
+    void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+                            const Shader& shader, GLenum primitive_mode,
+                            BaseBindings base_bindings);
+
+    /// Configures the current textures to use for the draw command.
+    void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
+                       GLuint program_handle, BaseBindings base_bindings);
 
     /// Syncs the viewport and depth range to match the guest state
     void SyncViewport(OpenGLState& current_state);
@@ -209,7 +209,7 @@ private:
     ShaderCacheOpenGL shader_cache;
     GlobalRegionCacheOpenGL global_cache;
 
-    Core::Frontend::EmuWindow& emu_window;
+    Core::System& system;
 
     ScreenInfo& screen_info;
 
@@ -221,6 +221,7 @@ private:
 
     std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
     FramebufferConfigState current_framebuffer_config_state;
+    std::pair<bool, bool> current_depth_stencil_usage{};
 
     std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
 
@@ -229,6 +230,9 @@ private:
     PrimitiveAssembler primitive_assembler{buffer_cache};
     GLint uniform_buffer_alignment;
 
+    BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
+    BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+
     std::size_t CalculateVertexArraysSize() const;
 
     std::size_t CalculateIndexBufferSize() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index a05b8b936..55b6d8591 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <optional>
 #include <glad/glad.h>
 
 #include "common/alignment.h"
@@ -12,16 +13,15 @@
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
-#include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/morton.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
-#include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/surface.h"
-#include "video_core/textures/astc.h"
+#include "video_core/textures/convert.h"
 #include "video_core/textures/decoders.h"
 
 namespace OpenGL {
@@ -44,23 +44,22 @@ struct FormatTuple {
     bool compressed;
 };
 
-static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) {
-    glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
+static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
+    glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
     if (max_mip_level == 1) {
-        glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0);
+        glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
     }
 }
 
-void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
 
-    addr = cpu_addr ? *cpu_addr : 0;
     gpu_addr = gpu_addr_;
+    host_ptr = memory_manager.GetPointer(gpu_addr_);
     size_in_bytes = SizeInBytesRaw();
 
     if (IsPixelFormatASTC(pixel_format)) {
@@ -126,8 +125,12 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
 
     params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
     params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
+    if (!params.is_tiled) {
+        params.pitch = config.tic.Pitch();
+    }
     params.unaligned_height = config.tic.Height();
     params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+    params.identity = SurfaceClass::Uploaded;
 
     switch (params.target) {
     case SurfaceTarget::Texture1D:
@@ -167,6 +170,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
     }
 
     params.is_layered = SurfaceTargetIsLayered(params.target);
+    params.is_array = SurfaceTargetIsArray(params.target);
     params.max_mip_level = config.tic.max_mip_level + 1;
     params.rt = {};
 
@@ -190,10 +194,17 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
                              config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
     params.component_type = ComponentTypeFromRenderTarget(config.format);
     params.type = GetFormatType(params.pixel_format);
-    params.width = config.width;
+    if (params.is_tiled) {
+        params.width = config.width;
+    } else {
+        params.pitch = config.width;
+        const u32 bpp = params.GetFormatBpp() / 8;
+        params.width = params.pitch / bpp;
+    }
     params.height = config.height;
     params.unaligned_height = config.height;
     params.target = SurfaceTarget::Texture2D;
+    params.identity = SurfaceClass::RenderTarget;
     params.depth = 1;
     params.max_mip_level = 1;
     params.is_layered = false;
@@ -211,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
 }
 
 /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
-    u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+    u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
     u32 block_width, u32 block_height, u32 block_depth,
     Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
     SurfaceParams params{};
@@ -229,6 +240,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
     params.height = zeta_height;
     params.unaligned_height = zeta_height;
     params.target = SurfaceTarget::Texture2D;
+    params.identity = SurfaceClass::DepthBuffer;
     params.depth = 1;
     params.max_mip_level = 1;
     params.is_layered = false;
@@ -254,9 +266,14 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
     params.component_type = ComponentTypeFromRenderTarget(config.format);
     params.type = GetFormatType(params.pixel_format);
     params.width = config.width;
+    if (!params.is_tiled) {
+        const u32 bpp = params.GetFormatBpp() / 8;
+        params.pitch = config.width * bpp;
+    }
     params.height = config.height;
     params.unaligned_height = config.height;
     params.target = SurfaceTarget::Texture2D;
+    params.identity = SurfaceClass::Copy;
     params.depth = 1;
     params.max_mip_level = 1;
     params.rt = {};
@@ -386,7 +403,28 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
     return format;
 }
 
-MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+/// Returns the discrepant array target
+constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+        return GL_TEXTURE_1D_ARRAY;
+    case SurfaceTarget::Texture2D:
+        return GL_TEXTURE_2D_ARRAY;
+    case SurfaceTarget::Texture3D:
+        return GL_NONE;
+    case SurfaceTarget::Texture1DArray:
+        return GL_TEXTURE_1D;
+    case SurfaceTarget::Texture2DArray:
+        return GL_TEXTURE_2D;
+    case SurfaceTarget::TextureCubemap:
+        return GL_TEXTURE_CUBE_MAP_ARRAY;
+    case SurfaceTarget::TextureCubeArray:
+        return GL_TEXTURE_CUBE_MAP;
+    }
+    return GL_NONE;
+}
+
+Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
     u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
     if (IsPixelFormatASTC(pixel_format)) {
         // ASTC formats must stop at the ATSC block size boundary
@@ -410,8 +448,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
         for (u32 i = 0; i < params.depth; i++) {
             MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                           params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
-                          params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
-                          gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
+                          params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
+                          gl_buffer.data() + offset_gl, params.host_ptr + offset);
             offset += layer_size;
             offset_gl += gl_size;
         }
@@ -420,11 +458,12 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
         MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                       params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
                       params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
-                      gl_buffer.data(), gl_buffer.size(), params.addr + offset);
+                      gl_buffer.data(), params.host_ptr + offset);
     }
 }
 
-static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) {
+void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
+                                            const Surface& dst_surface) {
     const auto& src_params{src_surface->GetSurfaceParams()};
     const auto& dst_params{dst_surface->GetSurfaceParams()};
 
@@ -434,12 +473,15 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
     glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
                        0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
                        0, 0, width, height, 1);
+
+    dst_surface->MarkAsModified(true, *this);
 }
 
 MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
-static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
-                        const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
-                        const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) {
+void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
+                                        const GLuint copy_pbo_handle, const GLenum src_attachment,
+                                        const GLenum dst_attachment,
+                                        const std::size_t cubemap_face) {
     MICROPROFILE_SCOPE(OpenGL_CopySurface);
     ASSERT_MSG(dst_attachment == 0, "Unimplemented");
 
@@ -474,9 +516,9 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
                               "reinterpretation but the texture is tiled.");
         }
         const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
-
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
         glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
-                        Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
+                        memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
     }
 
     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -519,172 +561,58 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
         }
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
     }
+
+    dst_surface->MarkAsModified(true, *this);
 }
 
 CachedSurface::CachedSurface(const SurfaceParams& params)
-    : params(params), gl_target(SurfaceTargetToGL(params.target)),
-      cached_size_in_bytes(params.size_in_bytes) {
-    texture.Create();
-    const auto& rect{params.GetRect()};
-
-    // Keep track of previous texture bindings
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-
-    cur_state.texture_units[0].texture = texture.handle;
-    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
-    cur_state.Apply();
-    glActiveTexture(GL_TEXTURE0);
-
-    const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    gl_internal_format = format_tuple.internal_format;
-    gl_is_compressed = format_tuple.compressed;
+    : RasterizerCacheObject{params.host_ptr}, params{params},
+      gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
 
-    if (!format_tuple.compressed) {
-        // Only pre-create the texture for non-compressed textures.
-        switch (params.target) {
-        case SurfaceTarget::Texture1D:
-            glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth());
-            break;
-        case SurfaceTarget::Texture2D:
-        case SurfaceTarget::TextureCubemap:
-            glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
-            break;
-        case SurfaceTarget::Texture3D:
-        case SurfaceTarget::Texture2DArray:
-        case SurfaceTarget::TextureCubeArray:
-            glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
-                           params.depth);
-            break;
-        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
-                         static_cast<u32>(params.target));
-            UNREACHABLE();
-            glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
-                           rect.GetWidth(), rect.GetHeight());
-        }
-    }
+    const auto optional_cpu_addr{
+        Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
+    ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
+    cpu_addr = *optional_cpu_addr;
 
-    ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level);
+    texture.Create(gl_target);
 
-    LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
-                  SurfaceParams::SurfaceTargetName(params.target));
+    // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
+    // alternatives. This signals a bug on those functions.
+    const auto width = static_cast<GLsizei>(params.MipWidth(0));
+    const auto height = static_cast<GLsizei>(params.MipHeight(0));
+    memory_size = params.MemorySize();
+    reinterpreted = false;
 
-    // Clamp size to mapped GPU memory region
-    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
-    // R32F render buffer. We do not yet know if this is a game bug or something else, but this
-    // check is necessary to prevent flushing from overwriting unmapped memory.
-
-    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
-    if (cached_size_in_bytes > max_size) {
-        LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
-        cached_size_in_bytes = max_size;
-    }
-}
-
-static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
-    union S8Z24 {
-        BitField<0, 24, u32> z24;
-        BitField<24, 8, u32> s8;
-    };
-    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
-
-    union Z24S8 {
-        BitField<0, 8, u32> s8;
-        BitField<8, 24, u32> z24;
-    };
-    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
-
-    S8Z24 s8z24_pixel{};
-    Z24S8 z24s8_pixel{};
-    constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
-    for (std::size_t y = 0; y < height; ++y) {
-        for (std::size_t x = 0; x < width; ++x) {
-            const std::size_t offset{bpp * (y * width + x)};
-            if (reverse) {
-                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
-                s8z24_pixel.s8.Assign(z24s8_pixel.s8);
-                s8z24_pixel.z24.Assign(z24s8_pixel.z24);
-                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
-            } else {
-                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
-                z24s8_pixel.s8.Assign(s8z24_pixel.s8);
-                z24s8_pixel.z24.Assign(s8z24_pixel.z24);
-                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
-            }
-        }
-    }
-}
+    const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
+    gl_internal_format = format_tuple.internal_format;
 
-/**
- * Helper function to perform software conversion (as needed) when loading a buffer from Switch
- * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
- * typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
-                                               u32 width, u32 height, u32 depth) {
-    switch (pixel_format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_8X5:
-    case PixelFormat::ASTC_2D_5X4:
-    case PixelFormat::ASTC_2D_5X5:
-    case PixelFormat::ASTC_2D_4X4_SRGB:
-    case PixelFormat::ASTC_2D_8X8_SRGB:
-    case PixelFormat::ASTC_2D_8X5_SRGB:
-    case PixelFormat::ASTC_2D_5X4_SRGB:
-    case PixelFormat::ASTC_2D_5X5_SRGB:
-    case PixelFormat::ASTC_2D_10X8:
-    case PixelFormat::ASTC_2D_10X8_SRGB: {
-        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
-        u32 block_width{};
-        u32 block_height{};
-        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
-        data =
-            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
-        break;
-    }
-    case PixelFormat::S8Z24:
-        // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
-        ConvertS8Z24ToZ24S8(data, width, height, false);
+    switch (params.target) {
+    case SurfaceTarget::Texture1D:
+        glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width);
         break;
-    }
-}
-
-/**
- * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
- * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
- * with typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
-                                                u32 width, u32 height) {
-    switch (pixel_format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_4X4_SRGB:
-    case PixelFormat::ASTC_2D_8X8_SRGB:
-    case PixelFormat::ASTC_2D_5X5:
-    case PixelFormat::ASTC_2D_5X5_SRGB:
-    case PixelFormat::ASTC_2D_10X8:
-    case PixelFormat::ASTC_2D_10X8_SRGB: {
-        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
-                     static_cast<u32>(pixel_format));
-        UNREACHABLE();
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::TextureCubemap:
+        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height);
         break;
-    }
-    case PixelFormat::S8Z24:
-        // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
-        ConvertS8Z24ToZ24S8(data, width, height, true);
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
+        glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height, params.depth);
         break;
+    default:
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                     static_cast<u32>(params.target));
+        UNREACHABLE();
+        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height);
     }
+
+    ApplyTextureDefaults(texture.handle, params.max_mip_level);
+
+    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
 }
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -699,13 +627,31 @@ void CachedSurface::LoadGLBuffer() {
         for (u32 i = 0; i < params.max_mip_level; i++)
             SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
     } else {
-        const auto texture_src_data{Memory::GetPointer(params.addr)};
-        const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
-        gl_buffer[0].assign(texture_src_data, texture_src_data_end);
+        const u32 bpp = params.GetFormatBpp() / 8;
+        const u32 copy_size = params.width * bpp;
+        if (params.pitch == copy_size) {
+            std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
+        } else {
+            const u8* start{params.host_ptr};
+            u8* write_to = gl_buffer[0].data();
+            for (u32 h = params.height; h > 0; h--) {
+                std::memcpy(write_to, start, copy_size);
+                start += params.pitch;
+                write_to += copy_size;
+            }
+        }
     }
     for (u32 i = 0; i < params.max_mip_level; i++) {
-        ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
-                                           params.MipHeight(i), params.MipDepth(i));
+        const u32 width = params.MipWidth(i);
+        const u32 height = params.MipHeight(i);
+        const u32 depth = params.MipDepth(i);
+        if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
+            // Reserve size for RGBA8 conversion
+            constexpr std::size_t rgba_bpp = 4;
+            gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
+        }
+        Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
+                                               height, depth, true, true);
     }
 }
 
@@ -720,26 +666,35 @@ void CachedSurface::FlushGLBuffer() {
     gl_buffer[0].resize(GetSizeInBytes());
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+    glPixelStorei(GL_PACK_ALIGNMENT, align);
     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
     ASSERT(!tuple.compressed);
     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
     glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
                       static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
     glPixelStorei(GL_PACK_ROW_LENGTH, 0);
-    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
-                                        params.height);
-    ASSERT(params.type != SurfaceType::Fill);
-    const u8* const texture_src_data = Memory::GetPointer(params.addr);
-    ASSERT(texture_src_data);
+    Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
+                                           params.height, params.depth, true, true);
     if (params.is_tiled) {
         ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
                    params.block_width, static_cast<u32>(params.target));
 
         SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
     } else {
-        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
+        const u32 bpp = params.GetFormatBpp() / 8;
+        const u32 copy_size = params.width * bpp;
+        if (params.pitch == copy_size) {
+            std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
+        } else {
+            u8* start{params.host_ptr};
+            const u8* read_to = gl_buffer[0].data();
+            for (u32 h = params.height; h > 0; h--) {
+                std::memcpy(start, read_to, copy_size);
+                start += params.pitch;
+                read_to += copy_size;
+            }
+        }
     }
 }
 
@@ -748,63 +703,50 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
     const auto& rect{params.GetRect(mip_map)};
 
     // Load data from memory to the surface
-    const GLint x0 = static_cast<GLint>(rect.left);
-    const GLint y0 = static_cast<GLint>(rect.bottom);
-    std::size_t buffer_offset =
+    const auto x0 = static_cast<GLint>(rect.left);
+    const auto y0 = static_cast<GLint>(rect.bottom);
+    auto buffer_offset =
         static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
                                  static_cast<std::size_t>(x0)) *
         GetBytesPerPixel(params.pixel_format);
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    const GLuint target_tex = texture.handle;
-    OpenGLState cur_state = OpenGLState::GetCurState();
-
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-    cur_state.texture_units[0].texture = target_tex;
-    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
-    cur_state.Apply();
-
-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+
+    const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, align);
     glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
 
-    GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
-    glActiveTexture(GL_TEXTURE0);
+    const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
     if (tuple.compressed) {
         switch (params.target) {
         case SurfaceTarget::Texture2D:
-            glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage2D(
+                texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
+                &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture3D:
-            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
-                                   static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage3D(
+                texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)),
+                static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
+                &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture2DArray:
         case SurfaceTarget::TextureCubeArray:
-            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
-                                   static_cast<GLsizei>(params.depth), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage3D(
+                texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
+                tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::TextureCubemap: {
-            GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
+            const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
             for (std::size_t face = 0; face < params.depth; ++face) {
-                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
-                                       mip_map, tuple.internal_format,
-                                       static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                       static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
-                                       layer_size, &gl_buffer[mip_map][buffer_offset]);
+                glCompressedTextureSubImage3D(
+                    texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
+                    static_cast<GLsizei>(params.MipWidth(mip_map)),
+                    static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
+                    layer_size, &gl_buffer[mip_map][buffer_offset]);
                 buffer_offset += layer_size;
             }
             break;
@@ -813,46 +755,43 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                          static_cast<u32>(params.target));
             UNREACHABLE();
-            glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
-                                   static_cast<GLsizei>(params.size_in_bytes_gl),
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage2D(
+                texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
+                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
         }
     } else {
-
         switch (params.target) {
         case SurfaceTarget::Texture1D:
-            glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
-                            static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
+                                tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture2D:
-            glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage2D(texture.handle, mip_map, x0, y0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                                &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture3D:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
-                            tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
+                                tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture2DArray:
         case SurfaceTarget::TextureCubeArray:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
-                            tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
+                                tuple.type, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::TextureCubemap: {
             std::size_t start = buffer_offset;
             for (std::size_t face = 0; face < params.depth; ++face) {
-                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
-                                x0, y0, static_cast<GLsizei>(rect.GetWidth()),
-                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                                &gl_buffer[mip_map][buffer_offset]);
+                glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
+                                    static_cast<GLsizei>(rect.GetWidth()),
+                                    static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
+                                    tuple.type, &gl_buffer[mip_map][buffer_offset]);
                 buffer_offset += params.LayerSizeGL(mip_map);
             }
             break;
@@ -861,51 +800,62 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                          static_cast<u32>(params.target));
             UNREACHABLE();
-            glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage2D(texture.handle, mip_map, x0, y0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                                &gl_buffer[mip_map][buffer_offset]);
         }
     }
 
     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 }
 
-void CachedSurface::EnsureTextureView() {
-    if (texture_view.handle != 0)
+void CachedSurface::EnsureTextureDiscrepantView() {
+    if (discrepant_view.handle != 0)
         return;
-    // Compressed texture are not being created with immutable storage
-    UNIMPLEMENTED_IF(gl_is_compressed);
-
-    const GLenum target{TargetLayer()};
-
-    texture_view.Create();
-    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
-                  params.max_mip_level, 0, 1);
-
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-    cur_state.texture_units[0].texture = texture_view.handle;
-    cur_state.texture_units[0].target = target;
-    cur_state.Apply();
-
-    ApplyTextureDefaults(target, params.max_mip_level);
+
+    const GLenum target{GetArrayDiscrepantTarget(params.target)};
+    ASSERT(target != GL_NONE);
+
+    const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
+    constexpr GLuint min_layer = 0;
+    constexpr GLuint min_level = 0;
+
+    glGenTextures(1, &discrepant_view.handle);
+    glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
+                  params.max_mip_level, min_layer, num_layers);
+    ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
+    glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
+                         reinterpret_cast<const GLint*>(swizzle.data()));
 }
 
 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
 void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
-    if (params.type == SurfaceType::Fill)
-        return;
-
     MICROPROFILE_SCOPE(OpenGL_TextureUL);
 
     for (u32 i = 0; i < params.max_mip_level; i++)
         UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
 }
 
+void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
+                                  Tegra::Texture::SwizzleSource swizzle_y,
+                                  Tegra::Texture::SwizzleSource swizzle_z,
+                                  Tegra::Texture::SwizzleSource swizzle_w) {
+    const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
+    const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
+    const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
+    const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
+    if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
+        return;
+    }
+    swizzle = {new_x, new_y, new_z, new_w};
+    const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
+    glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
+    if (discrepant_view.handle != 0) {
+        glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
+    }
+}
+
 RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
     : RasterizerCache{rasterizer} {
     read_framebuffer.Create();
@@ -943,42 +893,45 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
     auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
     const auto& regs{gpu.regs};
 
-    if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
-        return last_color_buffers[index];
+    if (!gpu.dirty_flags.color_buffer[index]) {
+        return current_color_buffers[index];
     }
-    gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
+    gpu.dirty_flags.color_buffer.reset(index);
 
     ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
 
     if (index >= regs.rt_control.count) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
     }
 
     if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
     }
 
     const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
 
-    return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
+    return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
 }
 
 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
     surface->LoadGLBuffer();
     surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
     surface->MarkAsModified(false, *this);
+    surface->MarkForReload(false);
 }
 
 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
-    if (params.addr == 0 || params.height * params.width == 0) {
+    if (!params.IsValid()) {
         return {};
     }
 
     // Look up surface in the cache based on address
-    Surface surface{TryGet(params.addr)};
+    Surface surface{TryGet(params.host_ptr)};
     if (surface) {
         if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
-            // Use the cached surface as-is
+            // Use the cached surface as-is unless it's not synced with memory
+            if (surface->MustReload())
+                LoadSurface(surface);
             return surface;
         } else if (preserve_contents) {
             // If surface parameters changed and we care about keeping the previous data, recreate
@@ -986,6 +939,9 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
             Surface new_surface{RecreateSurface(surface, params)};
             Unregister(surface);
             Register(new_surface);
+            if (new_surface->IsUploaded()) {
+                RegisterReinterpretSurface(new_surface);
+            }
             return new_surface;
         } else {
             // Delete the old surface before creating a new one to prevent collisions.
@@ -1019,14 +975,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
                                                    const Surface& dst_surface) {
     const auto& init_params{src_surface->GetSurfaceParams()};
     const auto& dst_params{dst_surface->GetSurfaceParams()};
-    VAddr address = init_params.addr;
-    const std::size_t layer_size = dst_params.LayerMemorySize();
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    GPUVAddr address{init_params.gpu_addr};
+    const std::size_t layer_size{dst_params.LayerMemorySize()};
     for (u32 layer = 0; layer < dst_params.depth; layer++) {
         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
-            const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
-            const Surface& copy = TryGet(sub_address);
-            if (!copy)
+            const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+            const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
+            if (!copy) {
                 continue;
+            }
             const auto& src_params{copy->GetSurfaceParams()};
             const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
             const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1038,26 +996,161 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
         }
         address += layer_size;
     }
+
+    dst_surface->MarkAsModified(true, *this);
+}
+
+static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
+                        const Common::Rectangle<u32>& src_rect,
+                        const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
+                        GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
+                        std::size_t cubemap_face = 0) {
+
+    const auto& src_params{src_surface->GetSurfaceParams()};
+    const auto& dst_params{dst_surface->GetSurfaceParams()};
+
+    OpenGLState prev_state{OpenGLState::GetCurState()};
+    SCOPE_EXIT({ prev_state.Apply(); });
+
+    OpenGLState state;
+    state.draw.read_framebuffer = read_fb_handle;
+    state.draw.draw_framebuffer = draw_fb_handle;
+    state.Apply();
+
+    u32 buffers{};
+
+    if (src_params.type == SurfaceType::ColorTexture) {
+        switch (src_params.target) {
+        case SurfaceTarget::Texture2D:
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        case SurfaceTarget::TextureCubemap:
+            glFramebufferTexture2D(
+                GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+                src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(
+                GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+            break;
+        case SurfaceTarget::Texture2DArray:
+            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                      src_surface->Texture().handle, 0, 0);
+            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+            break;
+        case SurfaceTarget::Texture3D:
+            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   SurfaceTargetToGL(src_params.target),
+                                   src_surface->Texture().handle, 0, 0);
+            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                                   SurfaceTargetToGL(src_params.target), 0, 0, 0);
+            break;
+        default:
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        }
+
+        switch (dst_params.target) {
+        case SurfaceTarget::Texture2D:
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        case SurfaceTarget::TextureCubemap:
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+                dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+            break;
+        case SurfaceTarget::Texture2DArray:
+            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                      dst_surface->Texture().handle, 0, 0);
+            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+            break;
+
+        case SurfaceTarget::Texture3D:
+            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   SurfaceTargetToGL(dst_params.target),
+                                   dst_surface->Texture().handle, 0, 0);
+            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                                   SurfaceTargetToGL(dst_params.target), 0, 0, 0);
+            break;
+        default:
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        }
+
+        buffers = GL_COLOR_BUFFER_BIT;
+    } else if (src_params.type == SurfaceType::Depth) {
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                               src_surface->Texture().handle, 0);
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                               dst_surface->Texture().handle, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+        buffers = GL_DEPTH_BUFFER_BIT;
+    } else if (src_params.type == SurfaceType::DepthStencil) {
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                               src_surface->Texture().handle, 0);
+
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                               dst_surface->Texture().handle, 0);
+
+        buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+    }
+
+    glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
+                      dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
+                      buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
+
+    return true;
 }
 
 void RasterizerCacheOpenGL::FermiCopySurface(
     const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
-    const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) {
+    const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
+    const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
 
     const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
     const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
 
-    ASSERT(src_params.width == dst_params.width);
-    ASSERT(src_params.height == dst_params.height);
     ASSERT(src_params.pixel_format == dst_params.pixel_format);
     ASSERT(src_params.block_height == dst_params.block_height);
     ASSERT(src_params.is_tiled == dst_params.is_tiled);
     ASSERT(src_params.depth == dst_params.depth);
-    ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
     ASSERT(src_params.target == dst_params.target);
     ASSERT(src_params.rt.index == dst_params.rt.index);
 
-    FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false));
+    auto src_surface = GetSurface(src_params, true);
+    auto dst_surface = GetSurface(dst_params, true);
+
+    BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
+                draw_framebuffer.handle);
+
+    dst_surface->MarkAsModified(true, *this);
 }
 
 void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
@@ -1066,7 +1159,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
     const auto& dst_params{dst_surface->GetSurfaceParams()};
 
     // Flush enough memory for both the source and destination surface
-    FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
+    FlushRegion(ToCacheAddr(src_params.host_ptr),
+                std::max(src_params.MemorySize(), dst_params.MemorySize()));
 
     LoadSurface(dst_surface);
 }
@@ -1085,10 +1179,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
         return new_surface;
     }
 
+    const bool old_compressed =
+        GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
+    const bool new_compressed =
+        GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
+    const bool compatible_formats =
+        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
+        !(old_compressed || new_compressed);
     // For compatible surfaces, we can just do fast glCopyImageSubData based copy
-    if (old_params.target == new_params.target && old_params.type == new_params.type &&
-        old_params.depth == new_params.depth && old_params.depth == 1 &&
-        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
+    if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
+        old_params.depth == 1 && compatible_formats) {
         FastCopySurface(old_surface, new_surface);
         return new_surface;
     }
@@ -1103,7 +1203,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
     case SurfaceTarget::TextureCubemap:
     case SurfaceTarget::Texture2DArray:
     case SurfaceTarget::TextureCubeArray:
-        FastLayeredCopySurface(old_surface, new_surface);
+        if (compatible_formats)
+            FastLayeredCopySurface(old_surface, new_surface);
+        else {
+            AccurateCopySurface(old_surface, new_surface);
+        }
         break;
     default:
         LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1114,8 +1218,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
     return new_surface;
 }
 
-Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
-    return TryGet(addr);
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
+    return TryGet(host_ptr);
 }
 
 void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1132,4 +1236,108 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
     return {};
 }
 
+static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
+                                            u32 height) {
+    for (u32 i = 0; i < params.max_mip_level; i++) {
+        if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
+            return {i};
+        }
+    }
+    return {};
+}
+
+static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
+    const std::size_t size{params.LayerMemorySize()};
+    GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
+    for (u32 i = 0; i < params.depth; i++) {
+        if (start == addr) {
+            return {i};
+        }
+        start += size;
+    }
+    return {};
+}
+
+static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    const std::size_t src_memory_size = src_params.size_in_bytes;
+    const std::optional<u32> level =
+        TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
+    if (level.has_value()) {
+        if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
+            src_params.height == dst_params.MipHeight(*level) &&
+            src_params.block_height >= dst_params.MipBlockHeight(*level)) {
+            const std::optional<u32> slot =
+                TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
+            if (slot.has_value()) {
+                glCopyImageSubData(render_surface->Texture().handle,
+                                   SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
+                                   blitted_surface->Texture().handle,
+                                   SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
+                                   dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
+                blitted_surface->MarkAsModified(true, cache);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
+    const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
+    const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
+    if (bound2 > bound1)
+        return true;
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.component_type != src_params.component_type);
+}
+
+static bool IsReinterpretInvalidSecond(const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.height > src_params.height && dst_params.width > src_params.width);
+}
+
+bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
+                                                      Surface intersect) {
+    if (IsReinterpretInvalid(triggering_surface, intersect)) {
+        Unregister(intersect);
+        return false;
+    }
+    if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
+        if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
+            Unregister(intersect);
+            return false;
+        }
+        FlushObject(intersect);
+        FlushObject(triggering_surface);
+        intersect->MarkForReload(true);
+    }
+    return true;
+}
+
+void RasterizerCacheOpenGL::SignalPreDrawCall() {
+    if (texception && GLAD_GL_ARB_texture_barrier) {
+        glTextureBarrier();
+    }
+    texception = false;
+}
+
+void RasterizerCacheOpenGL::SignalPostDrawCall() {
+    for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
+        if (current_color_buffers[i] != nullptr) {
+            Surface intersect =
+                CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
+            if (intersect != nullptr) {
+                PartialReinterpretSurface(current_color_buffers[i], intersect);
+                texception = true;
+            }
+        }
+    }
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 37611c4fc..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -5,12 +5,13 @@
 #pragma once
 
 #include <array>
-#include <map>
 #include <memory>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #include "common/alignment.h"
+#include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "common/math_util.h"
@@ -27,14 +28,22 @@ namespace OpenGL {
 
 class CachedSurface;
 using Surface = std::shared_ptr<CachedSurface>;
-using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
+using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
 
 using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
 using SurfaceType = VideoCore::Surface::SurfaceType;
 using PixelFormat = VideoCore::Surface::PixelFormat;
 using ComponentType = VideoCore::Surface::ComponentType;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct SurfaceParams {
+    enum class SurfaceClass {
+        Uploaded,
+        RenderTarget,
+        DepthBuffer,
+        Copy,
+    };
+
     static std::string SurfaceTargetName(SurfaceTarget target) {
         switch (target) {
         case SurfaceTarget::Texture1D:
@@ -63,7 +72,7 @@ struct SurfaceParams {
     }
 
     /// Returns the rectangle corresponding to this surface
-    MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
+    Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
 
     /// Returns the total size of this surface in bytes, adjusted for compression
     std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -100,6 +109,11 @@ struct SurfaceParams {
         return size;
     }
 
+    /// Returns true if the parameters constitute a valid rasterizer surface.
+    bool IsValid() const {
+        return gpu_addr && host_ptr && height && width;
+    }
+
     /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
     /// mipmaps.
     std::size_t LayerMemorySize() const {
@@ -132,10 +146,18 @@ struct SurfaceParams {
         return offset;
     }
 
+    std::size_t GetMipmapSingleSize(u32 mip_level) const {
+        return InnerMipmapMemorySize(mip_level, false, is_layered);
+    }
+
     u32 MipWidth(u32 mip_level) const {
         return std::max(1U, width >> mip_level);
     }
 
+    u32 MipWidthGobAligned(u32 mip_level) const {
+        return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
+    }
+
     u32 MipHeight(u32 mip_level) const {
         return std::max(1U, height >> mip_level);
     }
@@ -160,23 +182,37 @@ struct SurfaceParams {
     }
 
     u32 MipBlockDepth(u32 mip_level) const {
-        if (mip_level == 0)
+        if (mip_level == 0) {
             return block_depth;
-        if (is_layered)
+        }
+
+        if (is_layered) {
             return 1;
-        u32 depth = MipDepth(mip_level);
+        }
+
+        const u32 mip_depth = MipDepth(mip_level);
         u32 bd = 32;
-        while (bd > 1 && depth * 2 <= bd) {
+        while (bd > 1 && mip_depth * 2 <= bd) {
             bd >>= 1;
         }
+
         if (bd == 32) {
-            u32 bh = MipBlockHeight(mip_level);
-            if (bh >= 4)
+            const u32 bh = MipBlockHeight(mip_level);
+            if (bh >= 4) {
                 return 16;
+            }
         }
+
         return bd;
     }
 
+    u32 RowAlign(u32 mip_level) const {
+        const u32 m_width = MipWidth(mip_level);
+        const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+        const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+        return (1U << l2);
+    }
+
     /// Creates SurfaceParams from a texture configuration
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
                                           const GLShader::SamplerEntry& entry);
@@ -186,7 +222,7 @@ struct SurfaceParams {
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(
-        u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+        u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
         u32 block_width, u32 block_height, u32 block_depth,
         Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
 
@@ -208,7 +244,49 @@ struct SurfaceParams {
     }
 
     /// Initializes parameters for caching, should be called after everything has been initialized
-    void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+    void InitCacheParameters(GPUVAddr gpu_addr);
+
+    std::string TargetName() const {
+        switch (target) {
+        case SurfaceTarget::Texture1D:
+            return "1D";
+        case SurfaceTarget::Texture2D:
+            return "2D";
+        case SurfaceTarget::Texture3D:
+            return "3D";
+        case SurfaceTarget::Texture1DArray:
+            return "1DArray";
+        case SurfaceTarget::Texture2DArray:
+            return "2DArray";
+        case SurfaceTarget::TextureCubemap:
+            return "Cube";
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
+            UNREACHABLE();
+            return fmt::format("TUK({})", static_cast<u32>(target));
+        }
+    }
+
+    std::string ClassName() const {
+        switch (identity) {
+        case SurfaceClass::Uploaded:
+            return "UP";
+        case SurfaceClass::RenderTarget:
+            return "RT";
+        case SurfaceClass::DepthBuffer:
+            return "DB";
+        case SurfaceClass::Copy:
+            return "CP";
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
+            UNREACHABLE();
+            return fmt::format("CUK({})", static_cast<u32>(identity));
+        }
+    }
+
+    std::string IdentityString() const {
+        return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
+    }
 
     bool is_tiled;
     u32 block_width;
@@ -222,13 +300,16 @@ struct SurfaceParams {
     u32 height;
     u32 depth;
     u32 unaligned_height;
+    u32 pitch;
     SurfaceTarget target;
+    SurfaceClass identity;
     u32 max_mip_level;
     bool is_layered;
+    bool is_array;
     bool srgb_conversion;
     // Parameters used for caching
-    VAddr addr;
-    Tegra::GPUVAddr gpu_addr;
+    u8* host_ptr;
+    GPUVAddr gpu_addr;
     std::size_t size_in_bytes;
     std::size_t size_in_bytes_gl;
 
@@ -255,6 +336,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
     static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
         SurfaceReserveKey res;
         res.state = params;
+        res.state.identity = {}; // Ignore the origin of the texture
         res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
         res.state.rt = {};       // Ignore rt config in caching
         return res;
@@ -275,16 +357,20 @@ class RasterizerOpenGL;
 
 class CachedSurface final : public RasterizerCacheObject {
 public:
-    CachedSurface(const SurfaceParams& params);
+    explicit CachedSurface(const SurfaceParams& params);
 
-    VAddr GetAddr() const override {
-        return params.addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
         return cached_size_in_bytes;
     }
 
+    std::size_t GetMemorySize() const {
+        return memory_size;
+    }
+
     void Flush() override {
         FlushGLBuffer();
     }
@@ -293,31 +379,19 @@ public:
         return texture;
     }
 
-    const OGLTexture& TextureLayer() {
-        if (params.is_layered) {
-            return Texture();
+    const OGLTexture& Texture(bool as_array) {
+        if (params.is_array == as_array) {
+            return texture;
+        } else {
+            EnsureTextureDiscrepantView();
+            return discrepant_view;
         }
-        EnsureTextureView();
-        return texture_view;
     }
 
     GLenum Target() const {
         return gl_target;
     }
 
-    GLenum TargetLayer() const {
-        using VideoCore::Surface::SurfaceTarget;
-        switch (params.target) {
-        case SurfaceTarget::Texture1D:
-            return GL_TEXTURE_1D_ARRAY;
-        case SurfaceTarget::Texture2D:
-            return GL_TEXTURE_2D_ARRAY;
-        case SurfaceTarget::TextureCubemap:
-            return GL_TEXTURE_CUBE_MAP_ARRAY;
-        }
-        return Target();
-    }
-
     const SurfaceParams& GetSurfaceParams() const {
         return params;
     }
@@ -329,19 +403,48 @@ public:
     // Upload data in gl_buffer to this surface's texture
     void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
 
+    void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
+                       Tegra::Texture::SwizzleSource swizzle_y,
+                       Tegra::Texture::SwizzleSource swizzle_z,
+                       Tegra::Texture::SwizzleSource swizzle_w);
+
+    void MarkReinterpreted() {
+        reinterpreted = true;
+    }
+
+    bool IsReinterpreted() const {
+        return reinterpreted;
+    }
+
+    void MarkForReload(bool reload) {
+        must_reload = reload;
+    }
+
+    bool MustReload() const {
+        return must_reload;
+    }
+
+    bool IsUploaded() const {
+        return params.identity == SurfaceParams::SurfaceClass::Uploaded;
+    }
+
 private:
     void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
 
-    void EnsureTextureView();
+    void EnsureTextureDiscrepantView();
 
     OGLTexture texture;
-    OGLTexture texture_view;
+    OGLTexture discrepant_view;
     std::vector<std::vector<u8>> gl_buffer;
     SurfaceParams params{};
     GLenum gl_target{};
     GLenum gl_internal_format{};
-    bool gl_is_compressed{};
     std::size_t cached_size_in_bytes{};
+    std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
+    std::size_t memory_size;
+    bool reinterpreted = false;
+    bool must_reload = false;
+    VAddr cpu_addr{};
 };
 
 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -359,11 +462,16 @@ public:
     Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
 
     /// Tries to find a framebuffer using on the provided CPU address
-    Surface TryFindFramebufferSurface(VAddr addr) const;
+    Surface TryFindFramebufferSurface(const u8* host_ptr) const;
 
     /// Copies the contents of one surface to another
     void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
-                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config);
+                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
+                          const Common::Rectangle<u32>& src_rect,
+                          const Common::Rectangle<u32>& dst_rect);
+
+    void SignalPreDrawCall();
+    void SignalPostDrawCall();
 
 private:
     void LoadSurface(const Surface& surface);
@@ -381,9 +489,17 @@ private:
     /// Tries to get a reserved surface for the specified parameters
     Surface TryGetReservedSurface(const SurfaceParams& params);
 
+    // Partialy reinterpret a surface based on a triggering_surface that collides with it.
+    // returns true if the reinterpret was successful, false in case it was not.
+    bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
+
     /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
     void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
     void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
+    void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
+    void CopySurface(const Surface& src_surface, const Surface& dst_surface,
+                     const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
+                     const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
 
     /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
     /// previously been used. This is to prevent surfaces from being constantly created and
@@ -393,12 +509,54 @@ private:
     OGLFramebuffer read_framebuffer;
     OGLFramebuffer draw_framebuffer;
 
+    bool texception = false;
+
     /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
     /// using the new format.
     OGLBuffer copy_pbo;
 
-    std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
     Surface last_depth_buffer;
+
+    using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
+    using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
+
+    static auto GetReinterpretInterval(const Surface& object) {
+        return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
+                                           object->GetCacheAddr() + object->GetMemorySize() - 1);
+    }
+
+    // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
+    SurfaceIntervalCache reinterpreted_surfaces;
+
+    void RegisterReinterpretSurface(Surface reinterpret_surface) {
+        auto interval = GetReinterpretInterval(reinterpret_surface);
+        reinterpreted_surfaces.insert({interval, reinterpret_surface});
+        reinterpret_surface->MarkReinterpreted();
+    }
+
+    Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
+        const SurfaceInterval interval{addr};
+        for (auto& pair :
+             boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
+            return pair.second;
+        }
+        return nullptr;
+    }
+
+    void Register(const Surface& object) override {
+        RasterizerCache<Surface>::Register(object);
+    }
+
+    /// Unregisters an object from the cache
+    void Unregister(const Surface& object) override {
+        if (object->IsReinterpreted()) {
+            auto interval = GetReinterpretInterval(object);
+            reinterpreted_surfaces.erase(interval);
+        }
+        RasterizerCache<Surface>::Unregister(object);
+    }
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 1da744158..bfe666a73 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
 
 namespace OpenGL {
 
-void OGLTexture::Create() {
+void OGLTexture::Create(GLenum target) {
     if (handle != 0)
         return;
 
     MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    glGenTextures(1, &handle);
+    glCreateTextures(target, 1, &handle);
 }
 
 void OGLTexture::Release() {
@@ -71,7 +71,8 @@ void OGLShader::Release() {
 }
 
 void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
-                                  const char* frag_shader, bool separable_program) {
+                                  const char* frag_shader, bool separable_program,
+                                  bool hint_retrievable) {
     OGLShader vert, geo, frag;
     if (vert_shader)
         vert.Create(vert_shader, GL_VERTEX_SHADER);
@@ -81,7 +82,7 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
         frag.Create(frag_shader, GL_FRAGMENT_SHADER);
 
     MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    Create(separable_program, vert.handle, geo.handle, frag.handle);
+    Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
 }
 
 void OGLProgram::Release() {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index e33f1e973..fbb93ee49 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -28,7 +28,7 @@ public:
     }
 
     /// Creates a new internal OpenGL resource and stores the handle
-    void Create();
+    void Create(GLenum target);
 
     /// Deletes the internal OpenGL resource
     void Release();
@@ -101,15 +101,15 @@ public:
     }
 
     template <typename... T>
-    void Create(bool separable_program, T... shaders) {
+    void Create(bool separable_program, bool hint_retrievable, T... shaders) {
         if (handle != 0)
             return;
-        handle = GLShader::LoadProgram(separable_program, shaders...);
+        handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
     }
 
     /// Creates a new internal OpenGL resource and stores the handle
     void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
-                          bool separable_program = false);
+                          bool separable_program = false, bool hint_retrievable = false);
 
     /// Deletes the internal OpenGL resource
     void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c785fffa3..99f67494c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,200 +6,515 @@
 #include "common/assert.h"
 #include "common/hash.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/shader_ir.h"
 
 namespace OpenGL {
 
+using VideoCommon::Shader::ProgramCode;
+
+// One UBO is always reserved for emulation values
+constexpr u32 RESERVED_UBOS = 1;
+
+struct UnspecializedShader {
+    std::string code;
+    GLShader::ShaderEntries entries;
+    Maxwell::ShaderProgram program_type;
+};
+
+namespace {
+
 /// Gets the address for the specified shader stage program
-static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
-    return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
-                                               shader_config.offset);
+GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+    const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
 }
 
 /// Gets the shader program code from memory for the specified address
-static GLShader::ProgramCode GetShaderCode(VAddr addr) {
-    GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
-    Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
+ProgramCode GetShaderCode(const u8* host_ptr) {
+    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
+    std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
     return program_code;
 }
 
-/// Helper function to set shader uniform block bindings for a single shader stage
-static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
-                                         Maxwell::ShaderStage binding, std::size_t expected_size) {
-    const GLuint ub_index = glGetUniformBlockIndex(shader, name);
-    if (ub_index == GL_INVALID_INDEX) {
-        return;
+/// Gets the shader type from a Maxwell program type
+constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
+    switch (program_type) {
+    case Maxwell::ShaderProgram::VertexA:
+    case Maxwell::ShaderProgram::VertexB:
+        return GL_VERTEX_SHADER;
+    case Maxwell::ShaderProgram::Geometry:
+        return GL_GEOMETRY_SHADER;
+    case Maxwell::ShaderProgram::Fragment:
+        return GL_FRAGMENT_SHADER;
+    default:
+        return GL_NONE;
     }
+}
 
-    GLint ub_size = 0;
-    glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
-    ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
-               "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
-    glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
 }
 
-/// Sets shader uniform block bindings for an entire shader program
-static void SetShaderUniformBlockBindings(GLuint shader) {
-    SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
-                                 sizeof(GLShader::MaxwellUniformData));
-    SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
-                                 sizeof(GLShader::MaxwellUniformData));
-    SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
-                                 sizeof(GLShader::MaxwellUniformData));
+/// Describes primitive behavior on geometry shaders
+constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
+    switch (primitive_mode) {
+    case GL_POINTS:
+        return {"points", "Points", 1};
+    case GL_LINES:
+    case GL_LINE_STRIP:
+        return {"lines", "Lines", 2};
+    case GL_LINES_ADJACENCY:
+    case GL_LINE_STRIP_ADJACENCY:
+        return {"lines_adjacency", "LinesAdj", 4};
+    case GL_TRIANGLES:
+    case GL_TRIANGLE_STRIP:
+    case GL_TRIANGLE_FAN:
+        return {"triangles", "Triangles", 3};
+    case GL_TRIANGLES_ADJACENCY:
+    case GL_TRIANGLE_STRIP_ADJACENCY:
+        return {"triangles_adjacency", "TrianglesAdj", 6};
+    default:
+        return {"points", "Invalid", 1};
+    }
 }
 
-CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
-    : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+    constexpr std::size_t start_offset = 10;
+    std::size_t offset = start_offset;
+    std::size_t size = start_offset * sizeof(u64);
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if (instruction == 0 || (instruction >> 52) == 0x50b) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+        }
+        size += sizeof(u64);
+        offset++;
+    }
+    // The last instruction is included in the program size
+    return std::min(size + sizeof(u64), program.size() * sizeof(u64));
+}
+
+/// Hashes one (or two) program streams
+u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
+                        const ProgramCode& code_b) {
+    u64 unique_identifier =
+        Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
+    if (program_type != Maxwell::ShaderProgram::VertexA) {
+        return unique_identifier;
+    }
+    // VertexA programs include two programs
 
-    GLShader::ProgramResult program_result;
-    GLenum gl_type{};
+    std::size_t seed = 0;
+    boost::hash_combine(seed, unique_identifier);
 
-    switch (program_type) {
-    case Maxwell::ShaderProgram::VertexA:
+    const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
+                                                CalculateProgramSize(code_b));
+    boost::hash_combine(seed, identifier_b);
+    return static_cast<u64>(seed);
+}
+
+/// Creates an unspecialized program from code streams
+GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
+                                      ProgramCode program_code_b) {
+    GLShader::ShaderSetup setup(program_code);
+    if (program_type == Maxwell::ShaderProgram::VertexA) {
         // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
         // Conventional HW does not support this, so we combine VertexA and VertexB into one
         // stage here.
-        setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+        setup.SetProgramB(program_code_b);
+    }
+    setup.program.unique_identifier =
+        GetUniqueIdentifier(program_type, program_code, program_code_b);
+
+    switch (program_type) {
+    case Maxwell::ShaderProgram::VertexA:
     case Maxwell::ShaderProgram::VertexB:
-        CalculateProperties();
-        program_result = GLShader::GenerateVertexShader(setup);
-        gl_type = GL_VERTEX_SHADER;
-        break;
+        return GLShader::GenerateVertexShader(setup);
     case Maxwell::ShaderProgram::Geometry:
-        CalculateProperties();
-        program_result = GLShader::GenerateGeometryShader(setup);
-        gl_type = GL_GEOMETRY_SHADER;
-        break;
+        return GLShader::GenerateGeometryShader(setup);
     case Maxwell::ShaderProgram::Fragment:
-        CalculateProperties();
-        program_result = GLShader::GenerateFragmentShader(setup);
-        gl_type = GL_FRAGMENT_SHADER;
-        break;
+        return GLShader::GenerateFragmentShader(setup);
     default:
         LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
         UNREACHABLE();
+        return {};
+    }
+}
+
+CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
+                               Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
+                               GLenum primitive_mode, bool hint_retrievable = false) {
+    std::string source = "#version 430 core\n";
+    source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+
+    for (const auto& cbuf : entries.const_buffers) {
+        source +=
+            fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
+    }
+    for (const auto& gmem : entries.global_memory_entries) {
+        source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
+                              gmem.GetCbufOffset(), base_bindings.gmem++);
+    }
+    for (const auto& sampler : entries.samplers) {
+        source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
+                              base_bindings.sampler++);
+    }
+
+    if (program_type == Maxwell::ShaderProgram::Geometry) {
+        const auto [glsl_topology, debug_name, max_vertices] =
+            GetPrimitiveDescription(primitive_mode);
+
+        source += "layout (" + std::string(glsl_topology) + ") in;\n";
+        source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
+    }
+
+    source += code;
+
+    OGLShader shader;
+    shader.Create(source.c_str(), GetShaderType(program_type));
+
+    auto program = std::make_shared<OGLProgram>();
+    program->Create(true, hint_retrievable, shader.handle);
+    return program;
+}
+
+std::set<GLenum> GetSupportedFormats() {
+    std::set<GLenum> supported_formats;
+
+    GLint num_formats{};
+    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+
+    std::vector<GLint> formats(num_formats);
+    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+
+    for (const GLint format : formats)
+        supported_formats.insert(static_cast<GLenum>(format));
+    return supported_formats;
+}
+
+} // namespace
+
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
+                           const PrecompiledPrograms& precompiled_programs,
+                           ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
+      unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
+      precompiled_programs{precompiled_programs} {
+
+    const std::size_t code_size = CalculateProgramSize(program_code);
+    const std::size_t code_size_b =
+        program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
+
+    GLShader::ProgramResult program_result =
+        CreateProgram(program_type, program_code, program_code_b);
+    if (program_result.first.empty()) {
+        // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
         return;
     }
 
+    code = program_result.first;
     entries = program_result.second;
     shader_length = entries.shader_length;
 
-    if (program_type != Maxwell::ShaderProgram::Geometry) {
-        OGLShader shader;
-        shader.Create(program_result.first.c_str(), gl_type);
-        program.Create(true, shader.handle);
-        SetShaderUniformBlockBindings(program.handle);
-        LabelGLObject(GL_PROGRAM, program.handle, addr);
-    } else {
-        // Store shader's code to lazily build it on draw
-        geometry_programs.code = program_result.first;
-    }
+    const ShaderDiskCacheRaw raw(unique_identifier, program_type,
+                                 static_cast<u32>(code_size / sizeof(u64)),
+                                 static_cast<u32>(code_size_b / sizeof(u64)),
+                                 std::move(program_code), std::move(program_code_b));
+    disk_cache.SaveRaw(raw);
 }
 
-GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
-    const auto search{resource_cache.find(buffer.GetHash())};
-    if (search == resource_cache.end()) {
-        const GLuint index{
-            glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
-        resource_cache[buffer.GetHash()] = index;
-        return index;
-    }
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
+                           const PrecompiledPrograms& precompiled_programs,
+                           GLShader::ProgramResult result, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
+      program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
+                                                              precompiled_programs} {
 
-    return search->second;
+    code = std::move(result.first);
+    entries = result.second;
+    shader_length = entries.shader_length;
 }
 
-GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
-    const auto search{uniform_cache.find(sampler.GetHash())};
-    if (search == uniform_cache.end()) {
-        const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
-        uniform_cache[sampler.GetHash()] = index;
-        return index;
+std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
+                                                                BaseBindings base_bindings) {
+    GLuint handle{};
+    if (program_type == Maxwell::ShaderProgram::Geometry) {
+        handle = GetGeometryShader(primitive_mode, base_bindings);
+    } else {
+        const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
+        auto& program = entry->second;
+        if (is_cache_miss) {
+            program = TryLoadProgram(primitive_mode, base_bindings);
+            if (!program) {
+                program =
+                    SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
+                disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
+            }
+
+            LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
+        }
+
+        handle = program->handle;
     }
 
-    return search->second;
+    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
+    base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
+    base_bindings.sampler += static_cast<u32>(entries.samplers.size());
+
+    return {handle, base_bindings};
 }
 
-GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
-                                         const std::string& glsl_topology, u32 max_vertices,
-                                         const std::string& debug_name) {
-    if (target_program.handle != 0) {
-        return target_program.handle;
+GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
+    const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
+    auto& programs = entry->second;
+
+    switch (primitive_mode) {
+    case GL_POINTS:
+        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
+    case GL_LINES:
+    case GL_LINE_STRIP:
+        return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
+    case GL_LINES_ADJACENCY:
+    case GL_LINE_STRIP_ADJACENCY:
+        return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
+    case GL_TRIANGLES:
+    case GL_TRIANGLE_STRIP:
+    case GL_TRIANGLE_FAN:
+        return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
+    case GL_TRIANGLES_ADJACENCY:
+    case GL_TRIANGLE_STRIP_ADJACENCY:
+        return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
+    default:
+        UNREACHABLE_MSG("Unknown primitive mode.");
+        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
     }
-    std::string source = "#version 430 core\n";
-    source += "layout (" + glsl_topology + ") in;\n";
-    source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
-    source += geometry_programs.code;
+}
 
-    OGLShader shader;
-    shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
-    target_program.Create(true, shader.handle);
-    SetShaderUniformBlockBindings(target_program.handle);
-    LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
-    return target_program.handle;
+GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
+                                         GLenum primitive_mode) {
+    if (target_program) {
+        return target_program->handle;
+    }
+    const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
+    target_program = TryLoadProgram(primitive_mode, base_bindings);
+    if (!target_program) {
+        target_program =
+            SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
+        disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
+    }
+
+    LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
+
+    return target_program->handle;
 };
 
-static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // sched instructions appear once every 4 instructions.
-    static constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
+CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
+                                           BaseBindings base_bindings) const {
+    const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
+    if (found == precompiled_programs.end()) {
+        return {};
+    }
+    return found->second;
 }
 
-static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
-    constexpr std::size_t start_offset = 10;
-    std::size_t offset = start_offset;
-    std::size_t size = start_offset * sizeof(u64);
-    while (offset < program.size()) {
-        const u64 inst = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if (inst == 0 || (inst >> 52) == 0x50b) {
-                break;
+ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
+                                            BaseBindings base_bindings) const {
+    return {unique_identifier, base_bindings, primitive_mode};
+}
+
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
+    : RasterizerCache{rasterizer}, disk_cache{system} {}
+
+void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+                                      const VideoCore::DiskResourceLoadCallback& callback) {
+    const auto transferable = disk_cache.LoadTransferable();
+    if (!transferable) {
+        return;
+    }
+    const auto [raws, usages] = *transferable;
+
+    auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
+
+    const auto supported_formats{GetSupportedFormats()};
+    const auto unspecialized{
+        GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
+    if (stop_loading)
+        return;
+
+    // Build shaders
+    if (callback)
+        callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
+    for (std::size_t i = 0; i < usages.size(); ++i) {
+        if (stop_loading)
+            return;
+
+        const auto& usage{usages[i]};
+        LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
+                 i + 1, usages.size());
+
+        const auto& unspec{unspecialized.at(usage.unique_identifier)};
+        const auto dump_it = dumps.find(usage);
+
+        CachedProgram shader;
+        if (dump_it != dumps.end()) {
+            // If the shader is dumped, attempt to load it with
+            shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
+            if (!shader) {
+                // Invalidate the precompiled cache if a shader dumped shader was rejected
+                disk_cache.InvalidatePrecompiled();
+                dumps.clear();
             }
         }
-        size += sizeof(inst);
-        offset++;
+        if (!shader) {
+            shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
+                                      usage.bindings, usage.primitive, true);
+        }
+        precompiled_programs.insert({usage, std::move(shader)});
+
+        if (callback)
+            callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
+    }
+
+    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
+    // precompiling them
+
+    for (std::size_t i = 0; i < usages.size(); ++i) {
+        const auto& usage{usages[i]};
+        if (dumps.find(usage) == dumps.end()) {
+            const auto& program = precompiled_programs.at(usage);
+            disk_cache.SaveDump(usage, program->handle);
+        }
     }
-    return size;
 }
 
-void CachedShader::CalculateProperties() {
-    setup.program.real_size = CalculateProgramSize(setup.program.code);
-    setup.program.real_size_b = 0;
-    setup.program.unique_identifier = Common::CityHash64(
-        reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size);
-    if (program_type == Maxwell::ShaderProgram::VertexA) {
-        std::size_t seed = 0;
-        boost::hash_combine(seed, setup.program.unique_identifier);
-        setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
-        const u64 identifier_b = Common::CityHash64(
-            reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
-        boost::hash_combine(seed, identifier_b);
-        setup.program.unique_identifier = static_cast<u64>(seed);
+CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
+    const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
+
+    if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
+        return {};
+    }
+
+    CachedProgram shader = std::make_shared<OGLProgram>();
+    shader->handle = glCreateProgram();
+    glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+    glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
+                    static_cast<GLsizei>(dump.binary.size()));
+
+    GLint link_status{};
+    glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
+    if (link_status == GL_FALSE) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
+        return {};
     }
+
+    return shader;
 }
 
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
+std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
+    const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
+    const std::vector<ShaderDiskCacheRaw>& raws,
+    const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
+    std::unordered_map<u64, UnspecializedShader> unspecialized;
+
+    if (callback)
+        callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
+
+    for (std::size_t i = 0; i < raws.size(); ++i) {
+        if (stop_loading)
+            return {};
+
+        const auto& raw{raws[i]};
+        const u64 unique_identifier = raw.GetUniqueIdentifier();
+        const u64 calculated_hash =
+            GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+        if (unique_identifier != calculated_hash) {
+            LOG_ERROR(
+                Render_OpenGL,
+                "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
+                raw.GetUniqueIdentifier(), calculated_hash);
+            disk_cache.InvalidateTransferable();
+            return {};
+        }
+
+        GLShader::ProgramResult result;
+        if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
+            // If it's stored in the precompiled file, avoid decompiling it here
+            const auto& stored_decompiled{it->second};
+            result = {stored_decompiled.code, stored_decompiled.entries};
+        } else {
+            // Otherwise decompile the shader at boot and save the result to the decompiled file
+            result =
+                CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+            disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
+        }
+
+        precompiled_shaders.insert({unique_identifier, result});
+
+        unspecialized.insert(
+            {raw.GetUniqueIdentifier(),
+             {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
+
+        if (callback)
+            callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
+    }
+    return unspecialized;
+}
 
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
         return last_shaders[static_cast<u32>(program)];
     }
 
-    const VAddr program_addr{GetShaderAddress(program)};
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    const GPUVAddr program_addr{GetShaderAddress(program)};
 
     // Look up shader in the cache based on address
-    Shader shader{TryGet(program_addr)};
+    const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+    Shader shader{TryGet(host_ptr)};
 
     if (!shader) {
         // No shader found - create a new one
-        shader = std::make_shared<CachedShader>(program_addr, program);
+        ProgramCode program_code{GetShaderCode(host_ptr)};
+        ProgramCode program_code_b;
+        if (program == Maxwell::ShaderProgram::VertexA) {
+            program_code_b = GetShaderCode(
+                memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+        }
+        const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
+        const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
+        const auto found = precompiled_shaders.find(unique_identifier);
+        if (found != precompiled_shaders.end()) {
+            shader =
+                std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+                                               precompiled_programs, found->second, host_ptr);
+        } else {
+            shader = std::make_shared<CachedShader>(
+                cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+                std::move(program_code), std::move(program_code_b), host_ptr);
+        }
         Register(shader);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 768747968..0cf8e0b3d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,29 +5,51 @@
 #pragma once
 
 #include <array>
-#include <map>
+#include <atomic>
 #include <memory>
+#include <set>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#include <glad/glad.h>
 
-#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+
+namespace Core {
+class System;
+} // namespace Core
 
 namespace OpenGL {
 
 class CachedShader;
 class RasterizerOpenGL;
+struct UnspecializedShader;
 
 using Shader = std::shared_ptr<CachedShader>;
+using CachedProgram = std::shared_ptr<OGLProgram>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
+using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
 
 class CachedShader final : public RasterizerCacheObject {
 public:
-    CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
-
-    VAddr GetAddr() const override {
-        return addr;
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                          Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
+                          const PrecompiledPrograms& precompiled_programs,
+                          ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
+
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                          Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
+                          const PrecompiledPrograms& precompiled_programs,
+                          GLShader::ProgramResult result, u8* host_ptr);
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
@@ -43,82 +65,76 @@ public:
     }
 
     /// Gets the GL program handle for the shader
-    GLuint GetProgramHandle(GLenum primitive_mode) {
-        if (program_type != Maxwell::ShaderProgram::Geometry) {
-            return program.handle;
-        }
-        switch (primitive_mode) {
-        case GL_POINTS:
-            return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
-        case GL_LINES:
-        case GL_LINE_STRIP:
-            return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
-        case GL_LINES_ADJACENCY:
-        case GL_LINE_STRIP_ADJACENCY:
-            return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
-                                       "ShaderLinesAdjacency");
-        case GL_TRIANGLES:
-        case GL_TRIANGLE_STRIP:
-        case GL_TRIANGLE_FAN:
-            return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
-                                       "ShaderTriangles");
-        case GL_TRIANGLES_ADJACENCY:
-        case GL_TRIANGLE_STRIP_ADJACENCY:
-            return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
-                                       6, "ShaderTrianglesAdjacency");
-        default:
-            UNREACHABLE_MSG("Unknown primitive mode.");
-            return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
-        }
-    }
+    std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
+                                                      BaseBindings base_bindings);
 
-    /// Gets the GL program resource location for the specified resource, caching as needed
-    GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
+private:
+    // Geometry programs. These are needed because GLSL needs an input topology but it's not
+    // declared by the hardware. Workaround this issue by generating a different shader per input
+    // topology class.
+    struct GeometryPrograms {
+        CachedProgram points;
+        CachedProgram lines;
+        CachedProgram lines_adjacency;
+        CachedProgram triangles;
+        CachedProgram triangles_adjacency;
+    };
 
-    /// Gets the GL uniform location for the specified resource, caching as needed
-    GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
+    GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
 
-private:
     /// Generates a geometry shader or returns one that already exists.
-    GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
-                               u32 max_vertices, const std::string& debug_name);
+    GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
+                               GLenum primitive_mode);
+
+    CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
 
-    void CalculateProperties();
+    ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
 
-    VAddr addr;
-    std::size_t shader_length;
-    Maxwell::ShaderProgram program_type;
-    GLShader::ShaderSetup setup;
+    u8* host_ptr{};
+    VAddr cpu_addr{};
+    u64 unique_identifier{};
+    Maxwell::ShaderProgram program_type{};
+    ShaderDiskCacheOpenGL& disk_cache;
+    const PrecompiledPrograms& precompiled_programs;
+
+    std::size_t shader_length{};
     GLShader::ShaderEntries entries;
 
-    // Non-geometry program.
-    OGLProgram program;
+    std::string code;
 
-    // Geometry programs. These are needed because GLSL needs an input topology but it's not
-    // declared by the hardware. Workaround this issue by generating a different shader per input
-    // topology class.
-    struct {
-        std::string code;
-        OGLProgram points;
-        OGLProgram lines;
-        OGLProgram lines_adjacency;
-        OGLProgram triangles;
-        OGLProgram triangles_adjacency;
-    } geometry_programs;
-
-    std::map<u32, GLuint> resource_cache;
-    std::map<u32, GLint> uniform_cache;
+    std::unordered_map<BaseBindings, CachedProgram> programs;
+    std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
+
+    std::unordered_map<u32, GLuint> cbuf_resource_cache;
+    std::unordered_map<u32, GLuint> gmem_resource_cache;
+    std::unordered_map<u32, GLint> uniform_cache;
 };
 
 class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
 public:
-    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer);
+    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
+
+    /// Loads disk cache for the current game
+    void LoadDiskCache(const std::atomic_bool& stop_loading,
+                       const VideoCore::DiskResourceLoadCallback& callback);
 
     /// Gets the current specified shader stage program
     Shader GetStageProgram(Maxwell::ShaderProgram program);
 
 private:
+    std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
+        const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
+        const std::vector<ShaderDiskCacheRaw>& raws,
+        const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
+
+    CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
+                                             const std::set<GLenum>& supported_formats);
+
     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+
+    ShaderDiskCacheOpenGL disk_cache;
+    PrecompiledShaders precompiled_shaders;
+    PrecompiledPrograms precompiled_programs;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1bb09e61b..28e490b3c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2,247 +2,51 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <map>
-#include <optional>
-#include <set>
+#include <array>
 #include <string>
 #include <string_view>
-#include <unordered_set>
+#include <utility>
+#include <variant>
+#include <vector>
 
 #include <fmt/format.h>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/shader/shader_ir.h"
 
-namespace OpenGL::GLShader::Decompiler {
+namespace OpenGL::GLShader {
+
+namespace {
 
 using Tegra::Shader::Attribute;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::LogicOperation;
-using Tegra::Shader::OpCode;
+using Tegra::Shader::AttributeUse;
+using Tegra::Shader::Header;
+using Tegra::Shader::IpaInterpMode;
+using Tegra::Shader::IpaMode;
+using Tegra::Shader::IpaSampleMode;
 using Tegra::Shader::Register;
-using Tegra::Shader::Sampler;
-using Tegra::Shader::SubOp;
-
-constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
-constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
-
-constexpr u32 MAX_GEOMETRY_BUFFERS = 6;
-constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested
-
-static const char* INTERNAL_FLAG_NAMES[] = {"zero_flag", "sign_flag", "carry_flag",
-                                            "overflow_flag"};
-
-enum class InternalFlag : u64 {
-    ZeroFlag = 0,
-    SignFlag = 1,
-    CarryFlag = 2,
-    OverflowFlag = 3,
-    Amount
-};
-
-class DecompileFail : public std::runtime_error {
-public:
-    using std::runtime_error::runtime_error;
-};
-
-/// Generates code to use for a swizzle operation.
-static std::string GetSwizzle(u64 elem) {
-    ASSERT(elem <= 3);
-    std::string swizzle = ".";
-    swizzle += "xyzw"[elem];
-    return swizzle;
-}
-
-/// Translate topology
-static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
-    switch (topology) {
-    case Tegra::Shader::OutputTopology::PointList:
-        return "points";
-    case Tegra::Shader::OutputTopology::LineStrip:
-        return "line_strip";
-    case Tegra::Shader::OutputTopology::TriangleStrip:
-        return "triangle_strip";
-    default:
-        UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
-        return "points";
-    }
-}
-
-/// Describes the behaviour of code path of a given entry point and a return point.
-enum class ExitMethod {
-    Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
-    AlwaysReturn, ///< All code paths reach the return point.
-    Conditional,  ///< Code path reaches the return point or an END instruction conditionally.
-    AlwaysEnd,    ///< All code paths reach a END instruction.
-};
+using namespace VideoCommon::Shader;
 
-/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
-struct Subroutine {
-    /// Generates a name suitable for GLSL source code.
-    std::string GetName() const {
-        return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix;
-    }
-
-    u32 begin;                 ///< Entry point of the subroutine.
-    u32 end;                   ///< Return point of the subroutine.
-    const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name
-    ExitMethod exit_method;    ///< Exit method of the subroutine.
-    std::set<u32> labels;      ///< Addresses refereced by JMP instructions.
-
-    bool operator<(const Subroutine& rhs) const {
-        return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
-    }
-};
-
-/// Analyzes shader code and produces a set of subroutines.
-class ControlFlowAnalyzer {
-public:
-    ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
-        : program_code(program_code), shader_coverage_begin(main_offset),
-          shader_coverage_end(main_offset + 1) {
-
-        // Recursively finds all subroutines.
-        const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
-        if (program_main.exit_method != ExitMethod::AlwaysEnd)
-            throw DecompileFail("Program does not always end");
-    }
-
-    std::set<Subroutine> GetSubroutines() {
-        return std::move(subroutines);
-    }
-
-    std::size_t GetShaderLength() const {
-        return shader_coverage_end * sizeof(u64);
-    }
-
-private:
-    const ProgramCode& program_code;
-    std::set<Subroutine> subroutines;
-    std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
-    u32 shader_coverage_begin;
-    u32 shader_coverage_end;
-
-    /// Adds and analyzes a new subroutine if it is not added yet.
-    const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
-        Subroutine subroutine{begin, end, suffix, ExitMethod::Undetermined, {}};
-
-        const auto iter = subroutines.find(subroutine);
-        if (iter != subroutines.end()) {
-            return *iter;
-        }
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
+using Operation = const OperationNode&;
 
-        subroutine.exit_method = Scan(begin, end, subroutine.labels);
-        if (subroutine.exit_method == ExitMethod::Undetermined) {
-            throw DecompileFail("Recursive function detected");
-        }
-
-        return *subroutines.insert(std::move(subroutine)).first;
-    }
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
 
-    /// Merges exit method of two parallel branches.
-    static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
-        if (a == ExitMethod::Undetermined) {
-            return b;
-        }
-        if (b == ExitMethod::Undetermined) {
-            return a;
-        }
-        if (a == b) {
-            return a;
-        }
-        return ExitMethod::Conditional;
-    }
-
-    /// Scans a range of code for labels and determines the exit method.
-    ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
-        const auto [iter, inserted] =
-            exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
-        ExitMethod& exit_method = iter->second;
-        if (!inserted)
-            return exit_method;
-
-        for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
-            shader_coverage_begin = std::min(shader_coverage_begin, offset);
-            shader_coverage_end = std::max(shader_coverage_end, offset + 1);
-
-            const Instruction instr = {program_code[offset]};
-            if (const auto opcode = OpCode::Decode(instr)) {
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::EXIT: {
-                    // The EXIT instruction can be predicated, which means that the shader can
-                    // conditionally end on this instruction. We have to consider the case where the
-                    // condition is not met and check the exit method of that other basic block.
-                    using Tegra::Shader::Pred;
-                    if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
-                        return exit_method = ExitMethod::AlwaysEnd;
-                    } else {
-                        const ExitMethod not_met = Scan(offset + 1, end, labels);
-                        return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
-                    }
-                }
-                case OpCode::Id::BRA: {
-                    const u32 target = offset + instr.bra.GetBranchTarget();
-                    labels.insert(target);
-                    const ExitMethod no_jmp = Scan(offset + 1, end, labels);
-                    const ExitMethod jmp = Scan(target, end, labels);
-                    return exit_method = ParallelExit(no_jmp, jmp);
-                }
-                case OpCode::Id::SSY:
-                case OpCode::Id::PBK: {
-                    // The SSY and PBK use a similar encoding as the BRA instruction.
-                    UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
-                                         "Constant buffer branching is not supported");
-                    const u32 target = offset + instr.bra.GetBranchTarget();
-                    labels.insert(target);
-                    // Continue scanning for an exit method.
-                    break;
-                }
-                }
-            }
-        }
-        return exit_method = ExitMethod::AlwaysReturn;
-    }
-};
+struct TextureAoffi {};
+using TextureArgument = std::pair<Type, Node>;
+using TextureIR = std::variant<TextureAoffi, TextureArgument>;
 
-template <typename T>
-class ShaderScopedScope {
-public:
-    explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr)
-        : writer(writer), end_expr(std::move(end_expr)) {
-
-        if (begin_expr.empty()) {
-            writer.AddLine('{');
-        } else {
-            writer.AddExpression(begin_expr);
-            writer.AddLine(" {");
-        }
-        ++writer.scope;
-    }
-
-    ShaderScopedScope(const ShaderScopedScope&) = delete;
-
-    ~ShaderScopedScope() {
-        --writer.scope;
-        if (end_expr.empty()) {
-            writer.AddLine('}');
-        } else {
-            writer.AddExpression("} ");
-            writer.AddExpression(end_expr);
-            writer.AddLine(';');
-        }
-    }
-
-    ShaderScopedScope& operator=(const ShaderScopedScope&) = delete;
-
-private:
-    T& writer;
-    std::string end_expr;
-};
+enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
+    static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
+constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
+    static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
 
 class ShaderWriter {
 public:
@@ -271,16 +75,17 @@ public:
         shader_source += '\n';
     }
 
-    std::string GetResult() {
-        return std::move(shader_source);
+    std::string GenerateTemporary() {
+        std::string temporary = "tmp";
+        temporary += std::to_string(temporary_index++);
+        return temporary;
     }
 
-    ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {},
-                                          std::string end_expr = {}) {
-        return ShaderScopedScope(*this, begin_expr, end_expr);
+    std::string GetResult() {
+        return std::move(shader_source);
     }
 
-    int scope = 0;
+    s32 scope = 0;
 
 private:
     void AppendIndentation() {
@@ -288,3663 +93,1553 @@ private:
     }
 
     std::string shader_source;
+    u32 temporary_index = 1;
 };
 
-/**
- * Represents an emulated shader register, used to track the state of that register for emulation
- * with GLSL. At this time, a register can be used as a float or an integer. This class is used for
- * bookkeeping within the GLSL program.
- */
-class GLSLRegister {
-public:
-    enum class Type {
-        Float,
-        Integer,
-        UnsignedInteger,
-    };
-
-    GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
+/// Generates code to use for a swizzle operation.
+std::string GetSwizzle(u32 elem) {
+    ASSERT(elem <= 3);
+    std::string swizzle = ".";
+    swizzle += "xyzw"[elem];
+    return swizzle;
+}
 
-    /// Gets the GLSL type string for a register
-    static std::string GetTypeString() {
-        return "float";
+/// Translate topology
+std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
+    switch (topology) {
+    case Tegra::Shader::OutputTopology::PointList:
+        return "points";
+    case Tegra::Shader::OutputTopology::LineStrip:
+        return "line_strip";
+    case Tegra::Shader::OutputTopology::TriangleStrip:
+        return "triangle_strip";
+    default:
+        UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+        return "points";
     }
+}
 
-    /// Gets the GLSL register prefix string, used for declarations and referencing
-    static std::string GetPrefixString() {
-        return "reg_";
-    }
+/// Returns true if an object has to be treated as precise
+bool IsPrecise(Operation operand) {
+    const auto& meta = operand.GetMeta();
 
-    /// Returns a GLSL string representing the current state of the register
-    std::string GetString() const {
-        return GetPrefixString() + std::to_string(index) + '_' + suffix;
+    if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
+        return arithmetic->precise;
     }
-
-    /// Returns the index of the register
-    std::size_t GetIndex() const {
-        return index;
+    if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) {
+        return half_arithmetic->precise;
     }
+    return false;
+}
 
-private:
-    const std::size_t index;
-    const std::string& suffix;
-};
+bool IsPrecise(Node node) {
+    if (const auto operation = std::get_if<OperationNode>(node)) {
+        return IsPrecise(*operation);
+    }
+    return false;
+}
 
-/**
- * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
- * of all registers (e.g. whether they are currently being used as Floats or Integers), and
- * generates the necessary GLSL code to perform conversions as needed. This class is used for
- * bookkeeping within the GLSL program.
- */
-class GLSLRegisterManager {
+class GLSLDecompiler final {
 public:
-    GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
-                        const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
-                        const Tegra::Shader::Header& header)
-        : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
-          fixed_pipeline_output_attributes_used{}, local_memory_size{0} {
-        BuildRegisterList();
-        BuildInputList();
-    }
-
-    void SetConditionalCodesFromExpression(const std::string& expresion) {
-        SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0");
-        LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete.");
-    }
-
-    void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) {
-        SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem)));
-    }
-
-    /**
-     * Returns code that does an integer size conversion for the specified size.
-     * @param value Value to perform integer size conversion on.
-     * @param size Register size to use for conversion instructions.
-     * @returns GLSL string corresponding to the value converted to the specified size.
-     */
-    static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
-        switch (size) {
-        case Register::Size::Byte:
-            return "((" + value + " << 24) >> 24)";
-        case Register::Size::Short:
-            return "((" + value + " << 16) >> 16)";
-        case Register::Size::Word:
-            // Default - do nothing
-            return value;
-        default:
-            UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
-            return value;
-        }
-    }
+    explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix)
+        : ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
 
-    /**
-     * Gets a register as an float.
-     * @param reg The register to get.
-     * @param elem The element to use for the operation.
-     * @returns GLSL string corresponding to the register as a float.
-     */
-    std::string GetRegisterAsFloat(const Register& reg, unsigned elem = 0) {
-        return GetRegister(reg, elem);
-    }
-
-    /**
-     * Gets a register as an integer.
-     * @param reg The register to get.
-     * @param elem The element to use for the operation.
-     * @param is_signed Whether to get the register as a signed (or unsigned) integer.
-     * @param size Register size to use for conversion instructions.
-     * @returns GLSL string corresponding to the register as an integer.
-     */
-    std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
-                                     Register::Size size = Register::Size::Word) {
-        const std::string func{is_signed ? "floatBitsToInt" : "floatBitsToUint"};
-        const std::string value{func + '(' + GetRegister(reg, elem) + ')'};
-        return ConvertIntegerSize(value, size);
-    }
-
-    /**
-     * Writes code that does a register assignment to float value operation.
-     * @param reg The destination register to use.
-     * @param elem The element to use for the operation.
-     * @param value The code representing the value to assign.
-     * @param dest_num_components Number of components in the destination.
-     * @param value_num_components Number of components in the value.
-     * @param is_saturated Optional, when True, saturates the provided value.
-     * @param sets_cc Optional, when True, sets the corresponding values to the implemented
-     * condition flags.
-     * @param dest_elem Optional, the destination element to use for the operation.
-     */
-    void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
-                            u64 dest_num_components, u64 value_num_components,
-                            bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0,
-                            bool precise = false) {
-        const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value;
-        SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem,
-                    precise);
-        if (sets_cc) {
-            if (reg == Register::ZeroIndex) {
-                SetConditionalCodesFromExpression(clamped_value);
-            } else {
-                SetConditionalCodesFromRegister(reg, dest_elem);
-            }
-        }
-    }
+    void Decompile() {
+        DeclareVertex();
+        DeclareGeometry();
+        DeclareRegisters();
+        DeclarePredicates();
+        DeclareLocalMemory();
+        DeclareInternalFlags();
+        DeclareInputAttributes();
+        DeclareOutputAttributes();
+        DeclareConstantBuffers();
+        DeclareGlobalMemory();
+        DeclareSamplers();
 
-    /**
-     * Writes code that does a register assignment to integer value operation.
-     * @param reg The destination register to use.
-     * @param elem The element to use for the operation.
-     * @param value The code representing the value to assign.
-     * @param dest_num_components Number of components in the destination.
-     * @param value_num_components Number of components in the value.
-     * @param is_saturated Optional, when True, saturates the provided value.
-     * @param sets_cc Optional, when True, sets the corresponding values to the implemented
-     * condition flags.
-     * @param dest_elem Optional, the destination element to use for the operation.
-     * @param size Register size to use for conversion instructions.
-     */
-    void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
-                              const std::string& value, u64 dest_num_components,
-                              u64 value_num_components, bool is_saturated = false,
-                              bool sets_cc = false, u64 dest_elem = 0,
-                              Register::Size size = Register::Size::Word) {
-        UNIMPLEMENTED_IF(is_saturated);
-        const std::string final_value = ConvertIntegerSize(value, size);
-        const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
-
-        SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components,
-                    value_num_components, dest_elem, false);
-
-        if (sets_cc) {
-            if (reg == Register::ZeroIndex) {
-                SetConditionalCodesFromExpression(final_value);
-            } else {
-                SetConditionalCodesFromRegister(reg, dest_elem);
-            }
-        }
-    }
+        code.AddLine("void execute_" + suffix + "() {");
+        ++code.scope;
 
-    /**
-     * Writes code that does a register assignment to a half float value operation.
-     * @param reg The destination register to use.
-     * @param elem The element to use for the operation.
-     * @param value The code representing the value to assign. Type has to be half float.
-     * @param merge Half float kind of assignment.
-     * @param dest_num_components Number of components in the destination.
-     * @param value_num_components Number of components in the value.
-     * @param is_saturated Optional, when True, saturates the provided value.
-     * @param dest_elem Optional, the destination element to use for the operation.
-     */
-    void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value,
-                                Tegra::Shader::HalfMerge merge, u64 dest_num_components,
-                                u64 value_num_components, bool is_saturated = false,
-                                u64 dest_elem = 0) {
-        UNIMPLEMENTED_IF(is_saturated);
-
-        const std::string result = [&]() {
-            switch (merge) {
-            case Tegra::Shader::HalfMerge::H0_H1:
-                return "uintBitsToFloat(packHalf2x16(" + value + "))";
-            case Tegra::Shader::HalfMerge::F32:
-                // Half float instructions take the first component when doing a float cast.
-                return "float(" + value + ".x)";
-            case Tegra::Shader::HalfMerge::Mrg_H0:
-                // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the
-                // pack. I couldn't test this on hardware but it shouldn't really matter since most
-                // of the time when a Mrg_* flag is used both components will be mirrored. That
-                // being said, it deserves a test.
-                return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
-                       " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))";
-            case Tegra::Shader::HalfMerge::Mrg_H1:
-                return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
-                       " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))";
-            default:
-                UNREACHABLE();
-                return std::string("0");
-            }
-        }();
+        // VM's program counter
+        const auto first_address = ir.GetBasicBlocks().begin()->first;
+        code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;");
 
-        SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
-    }
+        // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
+        // unlikely that shaders will use 20 nested SSYs and PBKs.
+        constexpr u32 FLOW_STACK_SIZE = 20;
+        code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE));
+        code.AddLine("uint flow_stack_top = 0u;");
 
-    /**
-     * Writes code that does a register assignment to input attribute operation. Input attributes
-     * are stored as floats, so this may require conversion.
-     * @param reg The destination register to use.
-     * @param elem The element to use for the operation.
-     * @param attribute The input attribute to use as the source value.
-     * @param input_mode The input mode.
-     * @param vertex The register that decides which vertex to read from (used in GS).
-     */
-    void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
-                                    const Tegra::Shader::IpaMode& input_mode,
-                                    std::optional<Register> vertex = {}) {
-        const std::string dest = GetRegisterAsFloat(reg);
-        const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem);
-        shader.AddLine(dest + " = " + src + ';');
-    }
+        code.AddLine("while (true) {");
+        ++code.scope;
 
-    std::string GetLocalMemoryAsFloat(const std::string& index) {
-        return "lmem[" + index + ']';
-    }
-
-    std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) {
-        const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
-        return func + "(lmem[" + index + "])";
-    }
+        code.AddLine("switch (jmp_to) {");
 
-    void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) {
-        shader.AddLine("lmem[" + index + "] = " + value + ';');
-    }
+        for (const auto& pair : ir.GetBasicBlocks()) {
+            const auto [address, bb] = pair;
+            code.AddLine(fmt::format("case 0x{:x}u: {{", address));
+            ++code.scope;
 
-    void SetLocalMemoryAsInteger(const std::string& index, const std::string& value,
-                                 bool is_signed = false) {
-        const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
-        shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
-    }
+            VisitBlock(bb);
 
-    std::string GetConditionCode(const Tegra::Shader::ConditionCode cc) const {
-        switch (cc) {
-        case Tegra::Shader::ConditionCode::NEU:
-            return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
-            return "false";
+            --code.scope;
+            code.AddLine('}');
         }
-    }
 
-    std::string GetInternalFlag(const InternalFlag flag) const {
-        const auto index = static_cast<u32>(flag);
-        ASSERT(index < static_cast<u32>(InternalFlag::Amount));
+        code.AddLine("default: return;");
+        code.AddLine('}');
 
-        return std::string(INTERNAL_FLAG_NAMES[index]) + '_' + suffix;
+        for (std::size_t i = 0; i < 2; ++i) {
+            --code.scope;
+            code.AddLine('}');
+        }
     }
 
-    void SetInternalFlag(const InternalFlag flag, const std::string& value) const {
-        shader.AddLine(GetInternalFlag(flag) + " = " + value + ';');
+    std::string GetResult() {
+        return code.GetResult();
     }
 
-    /**
-     * Writes code that does a output attribute assignment to register operation. Output attributes
-     * are stored as floats, so this may require conversion.
-     * @param attribute The destination output attribute.
-     * @param elem The element to use for the operation.
-     * @param val_reg The register to use as the source value.
-     * @param buf_reg The register that tells which buffer to write to (used in geometry shaders).
-     */
-    void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg,
-                                      const Register& buf_reg) {
-        const std::string dest = GetOutputAttribute(attribute);
-        const std::string src = GetRegisterAsFloat(val_reg);
-        if (dest.empty())
-            return;
-
-        // Can happen with unknown/unimplemented output attributes, in which case we ignore the
-        // instruction for now.
-        if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
-            // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
-            // shader. These instructions use a dirty register as buffer index, to avoid some
-            // drivers from complaining about out of boundary writes, guard them.
-            const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
-                                        std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
-            shader.AddLine("amem[" + buf_index + "][" +
-                           std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) +
-                           " = " + src + ';');
-            return;
+    ShaderEntries GetShaderEntries() const {
+        ShaderEntries entries;
+        for (const auto& cbuf : ir.GetConstantBuffers()) {
+            entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
+                                               cbuf.first);
         }
-
-        switch (attribute) {
-        case Attribute::Index::ClipDistances0123:
-        case Attribute::Index::ClipDistances4567: {
-            const u64 index = (attribute == Attribute::Index::ClipDistances4567 ? 4 : 0) + elem;
-            UNIMPLEMENTED_IF_MSG(
-                ((header.vtg.clip_distances >> index) & 1) == 0,
-                "Shader is setting gl_ClipDistance{} without enabling it in the header", index);
-
-            clip_distances[index] = true;
-            fixed_pipeline_output_attributes_used.insert(attribute);
-            shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
-            break;
+        for (const auto& sampler : ir.GetSamplers()) {
+            entries.samplers.emplace_back(sampler);
         }
-        case Attribute::Index::PointSize:
-            fixed_pipeline_output_attributes_used.insert(attribute);
-            shader.AddLine(dest + " = " + src + ';');
-            break;
-        default:
-            shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
-            break;
+        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
+            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
         }
+        entries.clip_distances = ir.GetClipDistances();
+        entries.shader_length = ir.GetLength();
+        return entries;
     }
 
-    /// Generates code representing a uniform (C buffer) register, interpreted as the input type.
-    std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type,
-                           Register::Size size = Register::Size::Word) {
-        declr_const_buffers[index].MarkAsUsed(index, offset, stage);
-        std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" +
-                            std::to_string(offset % 4) + ']';
+private:
+    using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
+    using OperationDecompilersArray =
+        std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
 
-        if (type == GLSLRegister::Type::Float) {
-            // Do nothing, default
-        } else if (type == GLSLRegister::Type::Integer) {
-            value = "floatBitsToInt(" + value + ')';
-        } else if (type == GLSLRegister::Type::UnsignedInteger) {
-            value = "floatBitsToUint(" + value + ')';
-        } else {
-            UNREACHABLE();
-        }
+    void DeclareVertex() {
+        if (stage != ShaderStage::Vertex)
+            return;
 
-        return ConvertIntegerSize(value, size);
+        DeclareVertexRedeclarations();
     }
 
-    std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str,
-                                   GLSLRegister::Type type) {
-        declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
-
-        const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
-        const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
-                                  final_offset + " % 4]";
-
-        if (type == GLSLRegister::Type::Float) {
-            return value;
-        } else if (type == GLSLRegister::Type::Integer) {
-            return "floatBitsToInt(" + value + ')';
-        } else {
-            UNREACHABLE();
-            return value;
-        }
-    }
-
-    /// Add declarations.
-    void GenerateDeclarations(const std::string& suffix) {
-        GenerateVertex();
-        GenerateRegisters(suffix);
-        GenerateLocalMemory();
-        GenerateInternalFlags();
-        GenerateInputAttrs();
-        GenerateOutputAttrs();
-        GenerateConstBuffers();
-        GenerateSamplers();
-        GenerateGeometry();
-    }
+    void DeclareGeometry() {
+        if (stage != ShaderStage::Geometry)
+            return;
 
-    /// Returns a list of constant buffer declarations.
-    std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const {
-        std::vector<ConstBufferEntry> result;
-        std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(),
-                     std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); });
-        return result;
-    }
+        const auto topology = GetTopologyName(header.common3.output_topology);
+        const auto max_vertices = std::to_string(header.common4.max_output_vertices);
+        code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;");
+        code.AddNewLine();
 
-    /// Returns a list of samplers used in the shader.
-    const std::vector<SamplerEntry>& GetSamplers() const {
-        return used_samplers;
+        DeclareVertexRedeclarations();
     }
 
-    /// Returns an array of the used clip distances.
-    const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const {
-        return clip_distances;
-    }
+    void DeclareVertexRedeclarations() {
+        bool clip_distances_declared = false;
 
-    /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
-    /// necessary.
-    std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
-                              bool is_array, bool is_shadow) {
-        const auto offset = static_cast<std::size_t>(sampler.index.Value());
+        code.AddLine("out gl_PerVertex {");
+        ++code.scope;
 
-        // If this sampler has already been used, return the existing mapping.
-        const auto itr =
-            std::find_if(used_samplers.begin(), used_samplers.end(),
-                         [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
+        code.AddLine("vec4 gl_Position;");
 
-        if (itr != used_samplers.end()) {
-            ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
-                   itr->IsShadow() == is_shadow);
-            return itr->GetName();
+        for (const auto o : ir.GetOutputAttributes()) {
+            if (o == Attribute::Index::PointSize)
+                code.AddLine("float gl_PointSize;");
+            if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
+                                             o == Attribute::Index::ClipDistances4567)) {
+                code.AddLine("float gl_ClipDistance[];");
+                clip_distances_declared = true;
+            }
         }
 
-        // Otherwise create a new mapping for this sampler
-        const std::size_t next_index = used_samplers.size();
-        const SamplerEntry entry{stage, offset, next_index, type, is_array, is_shadow};
-        used_samplers.emplace_back(entry);
-        return entry.GetName();
+        --code.scope;
+        code.AddLine("};");
+        code.AddNewLine();
     }
 
-    void SetLocalMemory(u64 lmem) {
-        local_memory_size = lmem;
+    void DeclareRegisters() {
+        const auto& registers = ir.GetRegisters();
+        for (const u32 gpr : registers) {
+            code.AddLine("float " + GetRegister(gpr) + " = 0;");
+        }
+        if (!registers.empty())
+            code.AddNewLine();
     }
 
-private:
-    /// Generates declarations for registers.
-    void GenerateRegisters(const std::string& suffix) {
-        for (const auto& reg : regs) {
-            declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
-                                 std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
+    void DeclarePredicates() {
+        const auto& predicates = ir.GetPredicates();
+        for (const auto pred : predicates) {
+            code.AddLine("bool " + GetPredicate(pred) + " = false;");
         }
-        declarations.AddNewLine();
+        if (!predicates.empty())
+            code.AddNewLine();
     }
 
-    /// Generates declarations for local memory.
-    void GenerateLocalMemory() {
-        if (local_memory_size > 0) {
-            declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) +
-                                 "];");
-            declarations.AddNewLine();
+    void DeclareLocalMemory() {
+        if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
+            const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
+            code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];");
+            code.AddNewLine();
         }
     }
 
-    /// Generates declarations for internal flags.
-    void GenerateInternalFlags() {
+    void DeclareInternalFlags() {
         for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
-            const InternalFlag code = static_cast<InternalFlag>(flag);
-            declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
+            const InternalFlag flag_code = static_cast<InternalFlag>(flag);
+            code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;");
         }
-        declarations.AddNewLine();
+        code.AddNewLine();
     }
 
-    /// Generates declarations for input attributes.
-    void GenerateInputAttrs() {
-        for (const auto element : declr_input_attribute) {
+    std::string GetInputFlags(AttributeUse attribute) {
+        std::string out;
+
+        switch (attribute) {
+        case AttributeUse::Constant:
+            out += "flat ";
+            break;
+        case AttributeUse::ScreenLinear:
+            out += "noperspective ";
+            break;
+        case AttributeUse::Perspective:
+            // Default, Smooth
+            break;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
+            UNREACHABLE();
+        }
+        return out;
+    }
+
+    void DeclareInputAttributes() {
+        const auto& attributes = ir.GetInputAttributes();
+        for (const auto element : attributes) {
+            const Attribute::Index index = element.first;
+            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
+                // Skip when it's not a generic attribute
+                continue;
+            }
+
             // TODO(bunnei): Use proper number of elements for these
-            u32 idx =
-                static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
-            if (stage != Maxwell3D::Regs::ShaderStage::Vertex) {
+            u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
+            if (stage != ShaderStage::Vertex) {
                 // If inputs are varyings, add an offset
                 idx += GENERIC_VARYING_START_LOCATION;
             }
 
-            std::string attr{GetInputAttribute(element.first, element.second)};
-            if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
+            std::string attr = GetInputAttribute(index);
+            if (stage == ShaderStage::Geometry) {
                 attr = "gs_" + attr + "[]";
             }
-            declarations.AddLine("layout (location = " + std::to_string(idx) + ") " +
-                                 GetInputFlags(element.first) + "in vec4 " + attr + ';');
+            std::string suffix;
+            if (stage == ShaderStage::Fragment) {
+                const auto input_mode =
+                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
+                suffix = GetInputFlags(input_mode);
+            }
+            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
+                         attr + ';');
         }
-
-        declarations.AddNewLine();
+        if (!attributes.empty())
+            code.AddNewLine();
     }
 
-    /// Generates declarations for output attributes.
-    void GenerateOutputAttrs() {
-        for (const auto& index : declr_output_attribute) {
+    void DeclareOutputAttributes() {
+        const auto& attributes = ir.GetOutputAttributes();
+        for (const auto index : attributes) {
+            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
+                // Skip when it's not a generic attribute
+                continue;
+            }
             // TODO(bunnei): Use proper number of elements for these
-            const u32 idx = static_cast<u32>(index) -
-                            static_cast<u32>(Attribute::Index::Attribute_0) +
-                            GENERIC_VARYING_START_LOCATION;
-            declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
-                                 GetOutputAttribute(index) + ';');
-        }
-        declarations.AddNewLine();
-    }
-
-    /// Generates declarations for constant buffers.
-    void GenerateConstBuffers() {
-        for (const auto& entry : GetConstBuffersDeclarations()) {
-            declarations.AddLine("layout (std140) uniform " + entry.GetName());
-            declarations.AddLine('{');
-            declarations.AddLine("    vec4 c" + std::to_string(entry.GetIndex()) +
-                                 "[MAX_CONSTBUFFER_ELEMENTS];");
-            declarations.AddLine("};");
-            declarations.AddNewLine();
+            const auto idx = static_cast<u32>(index) -
+                             static_cast<u32>(Attribute::Index::Attribute_0) +
+                             GENERIC_VARYING_START_LOCATION;
+            code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
+                         GetOutputAttribute(index) + ';');
         }
-        declarations.AddNewLine();
+        if (!attributes.empty())
+            code.AddNewLine();
     }
 
-    /// Generates declarations for samplers.
-    void GenerateSamplers() {
-        const auto& samplers = GetSamplers();
-        for (const auto& sampler : samplers) {
-            declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
-                                 ';');
+    void DeclareConstantBuffers() {
+        for (const auto& entry : ir.GetConstantBuffers()) {
+            const auto [index, size] = entry;
+            code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) +
+                         ") uniform " + GetConstBufferBlock(index) + " {");
+            code.AddLine("    vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];");
+            code.AddLine("};");
+            code.AddNewLine();
         }
-        declarations.AddNewLine();
     }
 
-    /// Generates declarations used for geometry shaders.
-    void GenerateGeometry() {
-        if (stage != Maxwell3D::Regs::ShaderStage::Geometry)
-            return;
-
-        declarations.AddLine(
-            "layout (" + GetTopologyName(header.common3.output_topology) +
-            ", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;");
-        declarations.AddNewLine();
-
-        declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" +
-                             std::to_string(MAX_ATTRIBUTES) + "];");
-        declarations.AddNewLine();
-
-        constexpr char buffer[] = "amem[output_buffer]";
-        declarations.AddLine("void emit_vertex(uint output_buffer) {");
-        ++declarations.scope;
-        for (const auto element : declr_output_attribute) {
-            declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' +
-                                 std::to_string(static_cast<u32>(element)) + "];");
+    void DeclareGlobalMemory() {
+        for (const auto& entry : ir.GetGlobalMemoryBases()) {
+            const std::string binding =
+                fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
+            code.AddLine("layout (std430, binding = " + binding + ") buffer " +
+                         GetGlobalMemoryBlock(entry) + " {");
+            code.AddLine("    float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
+            code.AddLine("};");
+            code.AddNewLine();
         }
-
-        declarations.AddLine("position = " + std::string(buffer) + '[' +
-                             std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];");
-
-        // If a geometry shader is attached, it will always flip (it's the last stage before
-        // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
-        declarations.AddLine("position.xy *= viewport_flip.xy;");
-        declarations.AddLine("gl_Position = position;");
-        declarations.AddLine("position.w = 1.0;");
-        declarations.AddLine("EmitVertex();");
-        --declarations.scope;
-        declarations.AddLine('}');
-        declarations.AddNewLine();
     }
 
-    void GenerateVertex() {
-        if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
-            return;
-        bool clip_distances_declared = false;
+    void DeclareSamplers() {
+        const auto& samplers = ir.GetSamplers();
+        for (const auto& sampler : samplers) {
+            std::string sampler_type = [&]() {
+                switch (sampler.GetType()) {
+                case Tegra::Shader::TextureType::Texture1D:
+                    return "sampler1D";
+                case Tegra::Shader::TextureType::Texture2D:
+                    return "sampler2D";
+                case Tegra::Shader::TextureType::Texture3D:
+                    return "sampler3D";
+                case Tegra::Shader::TextureType::TextureCube:
+                    return "samplerCube";
+                default:
+                    UNREACHABLE();
+                    return "sampler2D";
+                }
+            }();
+            if (sampler.IsArray())
+                sampler_type += "Array";
+            if (sampler.IsShadow())
+                sampler_type += "Shadow";
 
-        declarations.AddLine("out gl_PerVertex {");
-        ++declarations.scope;
-        declarations.AddLine("vec4 gl_Position;");
-        for (auto& o : fixed_pipeline_output_attributes_used) {
-            if (o == Attribute::Index::PointSize)
-                declarations.AddLine("float gl_PointSize;");
-            if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
-                                             o == Attribute::Index::ClipDistances4567)) {
-                declarations.AddLine("float gl_ClipDistance[];");
-                clip_distances_declared = true;
-            }
+            code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
+                         ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
         }
-        --declarations.scope;
-        declarations.AddLine("};");
+        if (!samplers.empty())
+            code.AddNewLine();
     }
 
-    /// Generates code representing a temporary (GPR) register.
-    std::string GetRegister(const Register& reg, unsigned elem) {
-        if (reg == Register::ZeroIndex) {
-            return "0";
-        }
-
-        return regs[reg.GetSwizzledIndex(elem)].GetString();
-    }
-
-    /**
-     * Writes code that does a register assignment to value operation.
-     * @param reg The destination register to use.
-     * @param elem The element to use for the operation.
-     * @param value The code representing the value to assign.
-     * @param dest_num_components Number of components in the destination.
-     * @param value_num_components Number of components in the value.
-     * @param dest_elem Optional, the destination element to use for the operation.
-     */
-    void SetRegister(const Register& reg, u64 elem, const std::string& value,
-                     u64 dest_num_components, u64 value_num_components, u64 dest_elem,
-                     bool precise) {
-        if (reg == Register::ZeroIndex) {
-            // Setting RZ is a nop in hardware.
-            return;
-        }
-
-        std::string dest = GetRegister(reg, static_cast<u32>(dest_elem));
-        if (dest_num_components > 1) {
-            dest += GetSwizzle(elem);
-        }
-
-        std::string src = '(' + value + ')';
-        if (value_num_components > 1) {
-            src += GetSwizzle(elem);
-        }
-
-        if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
-            const auto scope = shader.Scope();
-
-            // This avoids optimizations of constant propagation and keeps the code as the original
-            // Sadly using the precise keyword causes "linking" errors on fragment shaders.
-            shader.AddLine("precise float tmp = " + src + ';');
-            shader.AddLine(dest + " = tmp;");
-        } else {
-            shader.AddLine(dest + " = " + src + ';');
+    void VisitBlock(const NodeBlock& bb) {
+        for (const Node node : bb) {
+            if (const std::string expr = Visit(node); !expr.empty()) {
+                code.AddLine(expr);
+            }
         }
     }
 
-    /// Build the GLSL register list.
-    void BuildRegisterList() {
-        regs.reserve(Register::NumRegisters);
-
-        for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
-            regs.emplace_back(index, suffix);
-        }
-    }
+    std::string Visit(Node node) {
+        if (const auto operation = std::get_if<OperationNode>(node)) {
+            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+            if (operation_index >= operation_decompilers.size()) {
+                UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
+                return {};
+            }
+            const auto decompiler = operation_decompilers[operation_index];
+            if (decompiler == nullptr) {
+                UNREACHABLE_MSG("Undefined operation: {}", operation_index);
+                return {};
+            }
+            return (this->*decompiler)(*operation);
 
-    void BuildInputList() {
-        const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) -
-                         static_cast<u32>(Attribute::Index::Attribute_0) + 1;
-        declr_input_attribute.reserve(size);
-    }
+        } else if (const auto gpr = std::get_if<GprNode>(node)) {
+            const u32 index = gpr->GetIndex();
+            if (index == Register::ZeroIndex) {
+                return "0";
+            }
+            return GetRegister(index);
 
-    /// Generates code representing an input attribute register.
-    std::string GetInputAttribute(Attribute::Index attribute,
-                                  const Tegra::Shader::IpaMode& input_mode,
-                                  std::optional<Register> vertex = {}) {
-        auto GeometryPass = [&](const std::string& name) {
-            if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) {
-                // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set
-                // an 0x80000000 index for those and the shader fails to build. Find out why this
-                // happens and what's its intent.
-                return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) +
-                       " % MAX_VERTEX_INPUT]";
+        } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+            const u32 value = immediate->GetValue();
+            if (value < 10) {
+                // For eyecandy avoid using hex numbers on single digits
+                return fmt::format("utof({}u)", immediate->GetValue());
             }
-            return name;
-        };
+            return fmt::format("utof(0x{:x}u)", immediate->GetValue());
 
-        switch (attribute) {
-        case Attribute::Index::Position:
-            if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
-                return GeometryPass("position");
-            } else {
-                return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)";
+        } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
+            const auto value = [&]() -> std::string {
+                switch (const auto index = predicate->GetIndex(); index) {
+                case Tegra::Shader::Pred::UnusedIndex:
+                    return "true";
+                case Tegra::Shader::Pred::NeverExecute:
+                    return "false";
+                default:
+                    return GetPredicate(index);
+                }
+            }();
+            if (predicate->IsNegated()) {
+                return "!(" + value + ')';
             }
-        case Attribute::Index::PointCoord:
-            return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)";
-        case Attribute::Index::TessCoordInstanceIDVertexID:
-            // TODO(Subv): Find out what the values are for the first two elements when inside a
-            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
-            // shader.
-            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
-            // Config pack's first value is instance_id.
-            return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))";
-        case Attribute::Index::FrontFacing:
-            // TODO(Subv): Find out what the values are for the other elements.
-            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
-            return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))";
-        default:
-            const u32 index{static_cast<u32>(attribute) -
-                            static_cast<u32>(Attribute::Index::Attribute_0)};
-            if (attribute >= Attribute::Index::Attribute_0 &&
-                attribute <= Attribute::Index::Attribute_31) {
-                if (declr_input_attribute.count(attribute) == 0) {
-                    declr_input_attribute[attribute] = input_mode;
+            return value;
+
+        } else if (const auto abuf = std::get_if<AbufNode>(node)) {
+            const auto attribute = abuf->GetIndex();
+            const auto element = abuf->GetElement();
+
+            const auto GeometryPass = [&](const std::string& name) {
+                if (stage == ShaderStage::Geometry && abuf->GetBuffer()) {
+                    // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
+                    // set an 0x80000000 index for those and the shader fails to build. Find out why
+                    // this happens and what's its intent.
+                    return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) +
+                           ") % MAX_VERTEX_INPUT]";
+                }
+                return name;
+            };
+
+            switch (attribute) {
+            case Attribute::Index::Position:
+                if (stage != ShaderStage::Fragment) {
+                    return GeometryPass("position") + GetSwizzle(element);
                 } else {
-                    UNIMPLEMENTED_IF_MSG(declr_input_attribute[attribute] != input_mode,
-                                         "Multiple input modes for the same attribute");
+                    return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
+                }
+            case Attribute::Index::PointCoord:
+                switch (element) {
+                case 0:
+                    return "gl_PointCoord.x";
+                case 1:
+                    return "gl_PointCoord.y";
+                case 2:
+                case 3:
+                    return "0";
                 }
-                return GeometryPass("input_attribute_" + std::to_string(index));
+                UNREACHABLE();
+                return "0";
+            case Attribute::Index::TessCoordInstanceIDVertexID:
+                // TODO(Subv): Find out what the values are for the first two elements when inside a
+                // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+                // shader.
+                ASSERT(stage == ShaderStage::Vertex);
+                switch (element) {
+                case 2:
+                    // Config pack's first value is instance_id.
+                    return "uintBitsToFloat(config_pack[0])";
+                case 3:
+                    return "uintBitsToFloat(gl_VertexID)";
+                }
+                UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+                return "0";
+            case Attribute::Index::FrontFacing:
+                // TODO(Subv): Find out what the values are for the other elements.
+                ASSERT(stage == ShaderStage::Fragment);
+                switch (element) {
+                case 3:
+                    return "itof(gl_FrontFacing ? -1 : 0)";
+                }
+                UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
+                return "0";
+            default:
+                if (attribute >= Attribute::Index::Attribute_0 &&
+                    attribute <= Attribute::Index::Attribute_31) {
+                    return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
+                }
+                break;
             }
-
             UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
-        }
 
-        return "vec4(0, 0, 0, 0)";
-    }
-
-    std::string GetInputFlags(const Attribute::Index attribute) {
-        const Tegra::Shader::IpaSampleMode sample_mode =
-            declr_input_attribute[attribute].sampling_mode;
-        const Tegra::Shader::IpaInterpMode interp_mode =
-            declr_input_attribute[attribute].interpolation_mode;
-        std::string out;
-        switch (interp_mode) {
-        case Tegra::Shader::IpaInterpMode::Flat: {
-            out += "flat ";
-            break;
-        }
-        case Tegra::Shader::IpaInterpMode::Linear: {
-            out += "noperspective ";
-            break;
-        }
-        case Tegra::Shader::IpaInterpMode::Perspective: {
-            // Default, Smooth
-            break;
-        }
-        default: {
-            UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
-        }
-        }
-        switch (sample_mode) {
-        case Tegra::Shader::IpaSampleMode::Centroid:
-            // It can be implemented with the "centroid " keyword in glsl
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
-            break;
-        case Tegra::Shader::IpaSampleMode::Default:
-            // Default, n/a
-            break;
-        default: {
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
-            break;
-        }
-        }
-        return out;
-    }
+        } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
+            const Node offset = cbuf->GetOffset();
+            if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+                // Direct access
+                const u32 offset_imm = immediate->GetValue();
+                ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
+                return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+                                   offset_imm / (4 * 4), (offset_imm / 4) % 4);
+
+            } else if (std::holds_alternative<OperationNode>(*offset)) {
+                // Indirect access
+                const std::string final_offset = code.GenerateTemporary();
+                code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
+                return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
+                                   final_offset, final_offset);
 
-    /// Generates code representing the declaration name of an output attribute register.
-    std::string GetOutputAttribute(Attribute::Index attribute) {
-        switch (attribute) {
-        case Attribute::Index::PointSize:
-            return "gl_PointSize";
-        case Attribute::Index::Position:
-            return "position";
-        case Attribute::Index::ClipDistances0123:
-        case Attribute::Index::ClipDistances4567: {
-            return "gl_ClipDistance";
-        }
-        default:
-            const u32 index{static_cast<u32>(attribute) -
-                            static_cast<u32>(Attribute::Index::Attribute_0)};
-            if (attribute >= Attribute::Index::Attribute_0) {
-                declr_output_attribute.insert(attribute);
-                return "output_attribute_" + std::to_string(index);
+            } else {
+                UNREACHABLE_MSG("Unmanaged offset node type");
             }
 
-            UNIMPLEMENTED_MSG("Unhandled output attribute={}", index);
-            return {};
-        }
-    }
-
-    ShaderWriter& shader;
-    ShaderWriter& declarations;
-    std::vector<GLSLRegister> regs;
-    std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute;
-    std::set<Attribute::Index> declr_output_attribute;
-    std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
-    std::vector<SamplerEntry> used_samplers;
-    const Maxwell3D::Regs::ShaderStage& stage;
-    const std::string& suffix;
-    const Tegra::Shader::Header& header;
-    std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
-    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
-    u64 local_memory_size;
-};
-
-class GLSLGenerator {
-public:
-    GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
-                  u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix,
-                  std::size_t shader_length)
-        : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
-          stage(stage), suffix(suffix), shader_length(shader_length) {
-        std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-        local_memory_size = header.GetLocalMemorySize();
-        regs.SetLocalMemory(local_memory_size);
-        Generate(suffix);
-    }
+        } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+            const std::string real = Visit(gmem->GetRealAddress());
+            const std::string base = Visit(gmem->GetBaseAddress());
+            const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+            return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
 
-    std::string GetShaderCode() {
-        return declarations.GetResult() + shader.GetResult();
-    }
+        } else if (const auto lmem = std::get_if<LmemNode>(node)) {
+            return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
 
-    /// Returns entries in the shader that are useful for external functions
-    ShaderEntries GetEntries() const {
-        return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(),
-                shader_length};
-    }
+        } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
+            return GetInternalFlag(internal_flag->GetFlag());
 
-private:
-    /// Gets the Subroutine object corresponding to the specified address.
-    const Subroutine& GetSubroutine(u32 begin, u32 end) const {
-        const auto iter = subroutines.find(Subroutine{begin, end, suffix});
-        ASSERT(iter != subroutines.end());
-        return *iter;
-    }
+        } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+            // It's invalid to call conditional on nested nodes, use an operation instead
+            code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
+            ++code.scope;
 
-    /// Generates code representing a 19-bit immediate value
-    static std::string GetImmediate19(const Instruction& instr) {
-        return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19());
-    }
+            VisitBlock(conditional->GetCode());
 
-    /// Generates code representing a 32-bit immediate value
-    static std::string GetImmediate32(const Instruction& instr) {
-        return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32());
-    }
+            --code.scope;
+            code.AddLine('}');
+            return {};
 
-    /// Generates code representing a vec2 pair unpacked from a half float immediate
-    static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) {
-        const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates()));
-        if (!negate) {
-            return immediate;
+        } else if (const auto comment = std::get_if<CommentNode>(node)) {
+            return "// " + comment->GetText();
         }
-        const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : "";
-        const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : "";
-        const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)";
-
-        return '(' + immediate + " * " + negate_vec + ')';
+        UNREACHABLE();
+        return {};
     }
 
-    /// Generates code representing a texture sampler.
-    std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array,
-                           bool is_shadow) {
-        return regs.AccessSampler(sampler, type, is_array, is_shadow);
-    }
-
-    /**
-     * Adds code that calls a subroutine.
-     * @param subroutine the subroutine to call.
-     */
-    void CallSubroutine(const Subroutine& subroutine) {
-        if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
-            shader.AddLine(subroutine.GetName() + "();");
-            shader.AddLine("return true;");
-        } else if (subroutine.exit_method == ExitMethod::Conditional) {
-            shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }");
-        } else {
-            shader.AddLine(subroutine.GetName() + "();");
+    std::string ApplyPrecise(Operation operation, const std::string& value) {
+        if (!IsPrecise(operation)) {
+            return value;
         }
-    }
+        // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
+        const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
 
-    /*
-     * Writes code that assigns a predicate boolean variable.
-     * @param pred The id of the predicate to write to.
-     * @param value The expression value to assign to the predicate.
-     */
-    void SetPredicate(u64 pred, const std::string& value) {
-        using Tegra::Shader::Pred;
-        // Can't assign to the constant predicate.
-        ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
-
-        std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
-        shader.AddLine(variable + " = " + value + ';');
-        declr_predicates.insert(std::move(variable));
-    }
-
-    /*
-     * Returns the condition to use in the 'if' for a predicated instruction.
-     * @param instr Instruction to generate the if condition for.
-     * @returns string containing the predicate condition.
-     */
-    std::string GetPredicateCondition(u64 index, bool negate) {
-        using Tegra::Shader::Pred;
-        std::string variable;
-
-        // Index 7 is used as an 'Always True' condition.
-        if (index == static_cast<u64>(Pred::UnusedIndex)) {
-            variable = "true";
-        } else {
-            variable = 'p' + std::to_string(index) + '_' + suffix;
-            declr_predicates.insert(variable);
-        }
-        if (negate) {
-            return "!(" + variable + ')';
-        }
+        const std::string temporary = code.GenerateTemporary();
+        code.AddLine(precise + "float " + temporary + " = " + value + ';');
+        return temporary;
+    }
 
-        return variable;
-    }
-
-    /**
-     * Returns the comparison string to use to compare two values in the 'set' family of
-     * instructions.
-     * @param condition The condition used in the 'set'-family instruction.
-     * @param op_a First operand to use for the comparison.
-     * @param op_b Second operand to use for the comparison.
-     * @returns String corresponding to the GLSL operator that matches the desired comparison.
-     */
-    std::string GetPredicateComparison(Tegra::Shader::PredCondition condition,
-                                       const std::string& op_a, const std::string& op_b) const {
-        using Tegra::Shader::PredCondition;
-        static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
-            {PredCondition::LessThan, "<"},
-            {PredCondition::Equal, "=="},
-            {PredCondition::LessEqual, "<="},
-            {PredCondition::GreaterThan, ">"},
-            {PredCondition::NotEqual, "!="},
-            {PredCondition::GreaterEqual, ">="},
-            {PredCondition::LessThanWithNan, "<"},
-            {PredCondition::NotEqualWithNan, "!="},
-            {PredCondition::LessEqualWithNan, "<="},
-            {PredCondition::GreaterThanWithNan, ">"},
-            {PredCondition::GreaterEqualWithNan, ">="}};
-
-        const auto& comparison{PredicateComparisonStrings.find(condition)};
-        UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonStrings.end(),
-                             "Unknown predicate comparison operation");
-
-        std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
-        if (condition == PredCondition::LessThanWithNan ||
-            condition == PredCondition::NotEqualWithNan ||
-            condition == PredCondition::LessEqualWithNan ||
-            condition == PredCondition::GreaterThanWithNan ||
-            condition == PredCondition::GreaterEqualWithNan) {
-            predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
+    std::string VisitOperand(Operation operation, std::size_t operand_index) {
+        const auto& operand = operation[operand_index];
+        const bool parent_precise = IsPrecise(operation);
+        const bool child_precise = IsPrecise(operand);
+        const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
+        if (!parent_precise || child_precise || child_trivial) {
+            return Visit(operand);
         }
 
-        return predicate;
-    }
-
-    /**
-     * Returns the operator string to use to combine two predicates in the 'setp' family of
-     * instructions.
-     * @params operation The operator used in the 'setp'-family instruction.
-     * @returns String corresponding to the GLSL operator that matches the desired operator.
-     */
-    std::string GetPredicateCombiner(Tegra::Shader::PredOperation operation) const {
-        using Tegra::Shader::PredOperation;
-        static const std::unordered_map<PredOperation, const char*> PredicateOperationStrings = {
-            {PredOperation::And, "&&"},
-            {PredOperation::Or, "||"},
-            {PredOperation::Xor, "^^"},
-        };
-
-        auto op = PredicateOperationStrings.find(operation);
-        UNIMPLEMENTED_IF_MSG(op == PredicateOperationStrings.end(), "Unknown predicate operation");
-        return op->second;
-    }
-
-    /**
-     * Transforms the input string GLSL operand into one that applies the abs() function and negates
-     * the output if necessary. When both abs and neg are true, the negation will be applied after
-     * taking the absolute value.
-     * @param operand The input operand to take the abs() of, negate, or both.
-     * @param abs Whether to apply the abs() function to the input operand.
-     * @param neg Whether to negate the input operand.
-     * @returns String corresponding to the operand after being transformed by the abs() and
-     * negation operations.
-     */
-    static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) {
-        std::string result = operand;
-
-        if (abs) {
-            result = "abs(" + result + ')';
-        }
+        const std::string temporary = code.GenerateTemporary();
+        code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
+        return temporary;
+    }
 
-        if (neg) {
-            result = "-(" + result + ')';
-        }
+    std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
+        std::string value = VisitOperand(operation, operand_index);
+        switch (type) {
+        case Type::HalfFloat: {
+            const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
+            if (!half_meta) {
+                value = "toHalf2(" + value + ')';
+            }
 
-        return result;
-    }
-
-    /*
-     * Transforms the input string GLSL operand into an unpacked half float pair.
-     * @note This function returns a float type pair instead of a half float pair. This is because
-     * real half floats are not standardized in GLSL but unpackHalf2x16 (which returns a vec2) is.
-     * @param operand Input operand. It has to be an unsigned integer.
-     * @param type How to unpack the unsigned integer to a half float pair.
-     * @param abs Get the absolute value of unpacked half floats.
-     * @param neg Get the negative value of unpacked half floats.
-     * @returns String corresponding to a half float pair.
-     */
-    static std::string GetHalfFloat(const std::string& operand,
-                                    Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1,
-                                    bool abs = false, bool neg = false) {
-        // "vec2" calls emitted in this function are intended to alias components.
-        const std::string value = [&]() {
-            switch (type) {
+            switch (half_meta->types.at(operand_index)) {
             case Tegra::Shader::HalfType::H0_H1:
-                return "unpackHalf2x16(" + operand + ')';
+                return "toHalf2(" + value + ')';
             case Tegra::Shader::HalfType::F32:
-                return "vec2(uintBitsToFloat(" + operand + "))";
+                return "vec2(" + value + ')';
             case Tegra::Shader::HalfType::H0_H0:
-            case Tegra::Shader::HalfType::H1_H1: {
-                const bool high = type == Tegra::Shader::HalfType::H1_H1;
-                const char unpack_index = "xy"[high ? 1 : 0];
-                return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')';
-            }
-            default:
-                UNREACHABLE();
-                return std::string("vec2(0)");
+                return "vec2(toHalf2(" + value + ")[0])";
+            case Tegra::Shader::HalfType::H1_H1:
+                return "vec2(toHalf2(" + value + ")[1])";
             }
-        }();
-
-        return GetOperandAbsNeg(value, abs, neg);
-    }
-
-    /*
-     * Returns whether the instruction at the specified offset is a 'sched' instruction.
-     * Sched instructions always appear before a sequence of 3 instructions.
-     */
-    bool IsSchedInstruction(u32 offset) const {
-        // sched instructions appear once every 4 instructions.
-        static constexpr std::size_t SchedPeriod = 4;
-        u32 absolute_offset = offset - main_offset;
-
-        return (absolute_offset % SchedPeriod) == 0;
-    }
-
-    void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
-                             const std::string& op_b,
-                             Tegra::Shader::PredicateResultMode predicate_mode,
-                             Tegra::Shader::Pred predicate, const bool set_cc) {
-        std::string result{};
-        switch (logic_op) {
-        case LogicOperation::And: {
-            result = '(' + op_a + " & " + op_b + ')';
-            break;
-        }
-        case LogicOperation::Or: {
-            result = '(' + op_a + " | " + op_b + ')';
-            break;
-        }
-        case LogicOperation::Xor: {
-            result = '(' + op_a + " ^ " + op_b + ')';
-            break;
-        }
-        case LogicOperation::PassB: {
-            result = op_b;
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
-        }
-
-        if (dest != Tegra::Shader::Register::ZeroIndex) {
-            regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
         }
-
-        using Tegra::Shader::PredicateResultMode;
-        // Write the predicate value depending on the predicate mode.
-        switch (predicate_mode) {
-        case PredicateResultMode::None:
-            // Do nothing.
-            return;
-        case PredicateResultMode::NotZero:
-            // Set the predicate to true if the result is not zero.
-            SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
-            break;
         default:
-            UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
-                              static_cast<u32>(predicate_mode));
+            return CastOperand(value, type);
         }
     }
 
-    void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b,
-                              const std::string& op_c, const std::string& imm_lut,
-                              const bool set_cc) {
-        if (dest == Tegra::Shader::Register::ZeroIndex) {
-            return;
-        }
-
-        static constexpr std::array<const char*, 32> shift_amounts = {
-            "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",  "8",  "9",  "10",
-            "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
-            "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"};
-
-        std::string result;
-        result += '(';
-
-        for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
-            if (i)
-                result += '|';
-            result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
-                      ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a +
-                      " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")";
+    std::string CastOperand(const std::string& value, Type type) const {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return "ftoi(" + value + ')';
+        case Type::Uint:
+            return "ftou(" + value + ')';
+        case Type::HalfFloat:
+            // Can't be handled as a stand-alone value
+            UNREACHABLE();
+            return value;
         }
-
-        result += ')';
-
-        regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
+        UNREACHABLE();
+        return value;
     }
 
-    void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) {
-        // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
-        // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
-
-        std::size_t written_components = 0;
-        for (u32 component = 0; component < 4; ++component) {
-            if (!instr.texs.IsComponentEnabled(component)) {
-                continue;
-            }
-
-            if (written_components < 2) {
-                // Write the first two swizzle components to gpr0 and gpr0+1
-                regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false,
-                                        written_components % 2);
-            } else {
-                ASSERT(instr.texs.HasTwoDestinations());
-                // Write the rest of the swizzle components to gpr28 and gpr28+1
-                regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false,
-                                        written_components % 2);
+    std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            if (needs_parenthesis) {
+                return '(' + value + ')';
             }
-
-            ++written_components;
-        }
-    }
-
-    void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) {
-        // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
-        // float instruction).
-
-        std::array<std::string, 4> components;
-        u32 written_components = 0;
-
-        for (u32 component = 0; component < 4; ++component) {
-            if (!instr.texs.IsComponentEnabled(component))
-                continue;
-            components[written_components++] = texture + GetSwizzle(component);
-        }
-        if (written_components == 0)
-            return;
-
-        const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) {
-            return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')';
-        };
-
-        regs.SetRegisterToHalfFloat(
-            instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1),
-            Tegra::Shader::HalfMerge::H0_H1, 1, 1);
-
-        if (written_components > 2) {
-            ASSERT(instr.texs.HasTwoDestinations());
-            regs.SetRegisterToHalfFloat(
-                instr.gpr28, 0,
-                BuildComponent(components[2], components[3], written_components > 3),
-                Tegra::Shader::HalfMerge::H0_H1, 1, 1);
-        }
+            return value;
+        case Type::Int:
+            return "itof(" + value + ')';
+        case Type::Uint:
+            return "utof(" + value + ')';
+        case Type::HalfFloat:
+            return "fromHalf2(" + value + ')';
+        }
+        UNREACHABLE();
+        return value;
     }
 
-    static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
-        switch (texture_type) {
-        case Tegra::Shader::TextureType::Texture1D:
-            return 1;
-        case Tegra::Shader::TextureType::Texture2D:
-            return 2;
-        case Tegra::Shader::TextureType::Texture3D:
-        case Tegra::Shader::TextureType::TextureCube:
-            return 3;
-        default:
-            UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
-            return 0;
-        }
+    std::string GenerateUnary(Operation operation, const std::string& func, Type result_type,
+                              Type type_a, bool needs_parenthesis = true) {
+        return ApplyPrecise(operation,
+                            BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')',
+                                              result_type, needs_parenthesis));
     }
 
-    /*
-     * Emits code to push the input target address to the flow address stack, incrementing the stack
-     * top.
-     */
-    void EmitPushToFlowStack(u32 target) {
-        const auto scope = shader.Scope();
+    std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type,
+                                    Type type_a, Type type_b) {
+        const std::string op_a = VisitOperand(operation, 0, type_a);
+        const std::string op_b = VisitOperand(operation, 1, type_b);
 
-        shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;");
-        shader.AddLine("flow_stack_top++;");
+        return ApplyPrecise(
+            operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type));
     }
 
-    /*
-     * Emits code to pop an address from the flow address stack, setting the jump address to the
-     * popped address and decrementing the stack top.
-     */
-    void EmitPopFromFlowStack() {
-        const auto scope = shader.Scope();
+    std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type,
+                                   Type type_a, Type type_b) {
+        const std::string op_a = VisitOperand(operation, 0, type_a);
+        const std::string op_b = VisitOperand(operation, 1, type_b);
 
-        shader.AddLine("flow_stack_top--;");
-        shader.AddLine("jmp_to = flow_stack[flow_stack_top];");
-        shader.AddLine("break;");
+        return ApplyPrecise(operation,
+                            BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type));
     }
 
-    /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
-    void EmitFragmentOutputsWrite() {
-        ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
+    std::string GenerateTernary(Operation operation, const std::string& func, Type result_type,
+                                Type type_a, Type type_b, Type type_c) {
+        const std::string op_a = VisitOperand(operation, 0, type_a);
+        const std::string op_b = VisitOperand(operation, 1, type_b);
+        const std::string op_c = VisitOperand(operation, 2, type_c);
 
-        UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Samplemask write is unimplemented");
-
-        shader.AddLine("if (alpha_test[0] != 0) {");
-        ++shader.scope;
-        // We start on the register containing the alpha value in the first RT.
-        u32 current_reg = 3;
-        for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets;
-             ++render_target) {
-            // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when
-            // multiple render targets are used.
-            if (header.ps.IsColorComponentOutputEnabled(render_target, 0) ||
-                header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
-                header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
-                header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
-                shader.AddLine(fmt::format("if (!AlphaFunc({})) discard;",
-                                           regs.GetRegisterAsFloat(current_reg)));
-                current_reg += 4;
-            }
-        }
-        --shader.scope;
-        shader.AddLine('}');
-
-        // Write the color outputs using the data in the shader registers, disabled
-        // rendertargets/components are skipped in the register assignment.
-        current_reg = 0;
-        for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets;
-             ++render_target) {
-            // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
-            for (u32 component = 0; component < 4; ++component) {
-                if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
-                    shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
-                                               regs.GetRegisterAsFloat(current_reg)));
-                    ++current_reg;
-                }
-            }
-        }
-
-        if (header.ps.omap.depth) {
-            // The depth output is always 2 registers after the last color output, and current_reg
-            // already contains one past the last color register.
-
-            shader.AddLine(
-                "gl_FragDepth = " +
-                regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
-                ';');
-        }
+        return ApplyPrecise(
+            operation,
+            BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type));
     }
 
-    /// Unpacks a video instruction operand (e.g. VMAD).
-    std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed,
-                                Tegra::Shader::VideoType type, u64 byte_height) {
-        const std::string value = [&]() {
-            if (!is_chunk) {
-                const auto offset = static_cast<u32>(byte_height * 8);
-                return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
-            }
-            const std::string zero = "0";
-
-            switch (type) {
-            case Tegra::Shader::VideoType::Size16_Low:
-                return '(' + op + " & 0xffff)";
-            case Tegra::Shader::VideoType::Size16_High:
-                return '(' + op + " >> 16)";
-            case Tegra::Shader::VideoType::Size32:
-                // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
-                // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
-                // explanation is found: abort.
-                UNIMPLEMENTED();
-                return zero;
-            case Tegra::Shader::VideoType::Invalid:
-                UNREACHABLE_MSG("Invalid instruction encoding");
-                return zero;
-            default:
-                UNREACHABLE();
-                return zero;
-            }
-        }();
-
-        if (is_signed) {
-            return "int(" + value + ')';
-        }
-        return value;
-    };
-
-    /// Gets the A operand for a video instruction.
-    std::string GetVideoOperandA(Instruction instr) {
-        return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
-                               instr.video.is_byte_chunk_a != 0, instr.video.signed_a,
-                               instr.video.type_a, instr.video.byte_height_a);
-    }
+    std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
+                                   Type type_a, Type type_b, Type type_c, Type type_d) {
+        const std::string op_a = VisitOperand(operation, 0, type_a);
+        const std::string op_b = VisitOperand(operation, 1, type_b);
+        const std::string op_c = VisitOperand(operation, 2, type_c);
+        const std::string op_d = VisitOperand(operation, 3, type_d);
 
-    /// Gets the B operand for a video instruction.
-    std::string GetVideoOperandB(Instruction instr) {
-        if (instr.video.use_register_b) {
-            return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
-                                   instr.video.is_byte_chunk_b != 0, instr.video.signed_b,
-                                   instr.video.type_b, instr.video.byte_height_b);
-        } else {
-            return '(' +
-                   std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
-                                                       : instr.alu.GetImm20_16()) +
-                   ')';
-        }
+        return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " +
+                                                             op_c + ", " + op_d + ')',
+                                                         result_type));
     }
 
-    std::pair<size_t, std::string> ValidateAndGetCoordinateElement(
-        const Tegra::Shader::TextureType texture_type, const bool depth_compare,
-        const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) {
-        const size_t coord_count = TextureCoordinates(texture_type);
-
-        size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
-        const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
-        if (total_coord_count > max_coords || total_reg_count > max_inputs) {
-            UNIMPLEMENTED_MSG("Unsupported Texture operation");
-            total_coord_count = std::min(total_coord_count, max_coords);
-        }
-        // 1D.DC opengl is using a vec3 but 2nd component is ignored later.
-        total_coord_count +=
-            (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D)
-                ? 1
-                : 0;
-
-        constexpr std::array<const char*, 5> coord_container{
-            {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(",
-             "vec4 coord = vec4("}};
-
-        return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]);
-    }
-
-    std::string GetTextureCode(const Tegra::Shader::Instruction& instr,
-                               const Tegra::Shader::TextureType texture_type,
-                               const Tegra::Shader::TextureProcessMode process_mode,
-                               const bool depth_compare, const bool is_array,
-                               const size_t bias_offset) {
-
-        if ((texture_type == Tegra::Shader::TextureType::Texture3D &&
-             (is_array || depth_compare)) ||
-            (texture_type == Tegra::Shader::TextureType::TextureCube && is_array &&
-             depth_compare)) {
-            UNIMPLEMENTED_MSG("This method is not supported.");
-        }
-
-        const std::string sampler =
-            GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-
-        const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ ||
-                                process_mode == Tegra::Shader::TextureProcessMode::LL ||
-                                process_mode == Tegra::Shader::TextureProcessMode::LLA;
-
-        // LOD selection (either via bias or explicit textureLod) not supported in GL for
-        // sampler2DArrayShadow and samplerCubeArrayShadow.
-        const bool gl_lod_supported = !(
-            (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
-            (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
+    std::string GenerateTexture(Operation operation, const std::string& function_suffix,
+                                const std::vector<TextureIR>& extras) {
+        constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
 
-        const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture(";
-        std::string texture = read_method + sampler + ", coord";
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
 
-        UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None &&
-                         !gl_lod_supported);
+        const std::size_t count = operation.GetOperandsCount();
+        const bool has_array = meta->sampler.IsArray();
+        const bool has_shadow = meta->sampler.IsShadow();
 
-        if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) {
-            if (process_mode == Tegra::Shader::TextureProcessMode::LZ) {
-                texture += ", 0.0";
-            } else {
-                // If present, lod or bias are always stored in the register indexed by the
-                // gpr20
-                // field with an offset depending on the usage of the other registers
-                texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset);
-            }
-        }
-        texture += ")";
-        return texture;
-    }
-
-    std::pair<std::string, std::string> GetTEXCode(
-        const Instruction& instr, const Tegra::Shader::TextureType texture_type,
-        const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
-        const bool is_array) {
-        const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
-                                       process_mode != Tegra::Shader::TextureProcessMode::LZ);
-
-        const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
-            texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
-        // If enabled arrays index is always stored in the gpr8 field
-        const u64 array_register = instr.gpr8.Value();
-        // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
-        const u64 coord_register = array_register + (is_array ? 1 : 0);
-
-        std::string coord = coord_dcl;
-        for (size_t i = 0; i < coord_count;) {
-            coord += regs.GetRegisterAsFloat(coord_register + i);
-            ++i;
-            if (i != coord_count) {
-                coord += ',';
-            }
-        }
-        // 1D.DC in opengl the 2nd component is ignored.
-        if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) {
-            coord += ",0.0";
-        }
-        if (is_array) {
-            coord += ',' + regs.GetRegisterAsInteger(array_register);
-        }
-        if (depth_compare) {
-            // Depth is always stored in the register signaled by gpr20
-            // or in the next register if lod or bias are used
-            const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-            coord += ',' + regs.GetRegisterAsFloat(depth_register);
-        }
-        coord += ");";
-        return std::make_pair(
-            coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0));
-    }
-
-    std::pair<std::string, std::string> GetTEXSCode(
-        const Instruction& instr, const Tegra::Shader::TextureType texture_type,
-        const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
-        const bool is_array) {
-        const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
-                                       process_mode != Tegra::Shader::TextureProcessMode::LZ);
-
-        const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
-            texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
-        // If enabled arrays index is always stored in the gpr8 field
-        const u64 array_register = instr.gpr8.Value();
-        // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
-        const u64 coord_register = array_register + (is_array ? 1 : 0);
-        const u64 last_coord_register =
-            (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
-                ? static_cast<u64>(instr.gpr20.Value())
-                : coord_register + 1;
-
-        std::string coord = coord_dcl;
-        for (size_t i = 0; i < coord_count; ++i) {
-            const bool last = (i == (coord_count - 1)) && (coord_count > 1);
-            coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i);
-            if (i < coord_count - 1) {
-                coord += ',';
-            }
+        std::string expr = "texture" + function_suffix;
+        if (!meta->aoffi.empty()) {
+            expr += "Offset";
         }
+        expr += '(' + GetSampler(meta->sampler) + ", ";
+        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
+        expr += '(';
+        for (std::size_t i = 0; i < count; ++i) {
+            expr += Visit(operation[i]);
 
-        if (is_array) {
-            coord += ',' + regs.GetRegisterAsInteger(array_register);
+            const std::size_t next = i + 1;
+            if (next < count)
+                expr += ", ";
         }
-        if (depth_compare) {
-            // Depth is always stored in the register signaled by gpr20
-            // or in the next register if lod or bias are used
-            const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-            coord += ',' + regs.GetRegisterAsFloat(depth_register);
+        if (has_array) {
+            expr += ", float(ftoi(" + Visit(meta->array) + "))";
         }
-        coord += ");";
-
-        return std::make_pair(coord,
-                              GetTextureCode(instr, texture_type, process_mode, depth_compare,
-                                             is_array, (coord_count > 2 ? 1 : 0)));
-    }
-
-    std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr,
-                                                    const Tegra::Shader::TextureType texture_type,
-                                                    const bool depth_compare, const bool is_array) {
-
-        const size_t coord_count = TextureCoordinates(texture_type);
-        const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
-        const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
-
-        constexpr std::array<const char*, 5> coord_container{
-            {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}};
-
-        // If enabled arrays index is always stored in the gpr8 field
-        const u64 array_register = instr.gpr8.Value();
-        // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
-        const u64 coord_register = array_register + (is_array ? 1 : 0);
-
-        std::string coord = coord_container[total_coord_count];
-        for (size_t i = 0; i < coord_count;) {
-            coord += regs.GetRegisterAsFloat(coord_register + i);
-            ++i;
-            if (i != coord_count) {
-                coord += ',';
-            }
-        }
-
-        if (is_array) {
-            coord += ',' + regs.GetRegisterAsInteger(array_register);
-        }
-        coord += ");";
-
-        const std::string sampler =
-            GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-
-        std::string texture = "textureGather(" + sampler + ", coord, ";
-        if (depth_compare) {
-            // Depth is always stored in the register signaled by gpr20
-            texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')';
-        } else {
-            texture += std::to_string(instr.tld4.component) + ')';
+        if (has_shadow) {
+            expr += ", " + Visit(meta->depth_compare);
         }
-        return std::make_pair(coord, texture);
-    }
-
-    std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr,
-                                                    const Tegra::Shader::TextureType texture_type,
-                                                    const bool is_array) {
-
-        const size_t coord_count = TextureCoordinates(texture_type);
-        const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
-        const bool lod_enabled =
-            instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL;
-
-        constexpr std::array<const char*, 4> coord_container{
-            {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}};
-
-        std::string coord = coord_container[total_coord_count];
-
-        // If enabled arrays index is always stored in the gpr8 field
-        const u64 array_register = instr.gpr8.Value();
-
-        // if is array gpr20 is used
-        const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
-
-        const u64 last_coord_register =
-            ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array
-                ? static_cast<u64>(instr.gpr20.Value())
-                : coord_register + 1;
+        expr += ')';
 
-        for (size_t i = 0; i < coord_count; ++i) {
-            const bool last = (i == (coord_count - 1)) && (coord_count > 1);
-            coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i);
-            if (i < coord_count - 1) {
-                coord += ',';
+        for (const auto& variant : extras) {
+            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
+                expr += GenerateTextureArgument(*argument);
+            } else if (std::get_if<TextureAoffi>(&variant)) {
+                expr += GenerateTextureAoffi(meta->aoffi);
+            } else {
+                UNREACHABLE();
             }
         }
-        if (is_array) {
-            coord += ',' + regs.GetRegisterAsInteger(array_register);
-        }
-        coord += ");";
-
-        const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false);
-
-        std::string texture = "texelFetch(" + sampler + ", coords";
-
-        if (lod_enabled) {
-            // When lod is used always is in grp20
-            texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')';
-        } else {
-            texture += ", 0)";
-        }
-        return std::make_pair(coord, texture);
-    }
-
-    /**
-     * Compiles a single instruction from Tegra to GLSL.
-     * @param offset the offset of the Tegra shader instruction.
-     * @return the offset of the next instruction to execute. Usually it is the current offset
-     * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
-     */
-    u32 CompileInstr(u32 offset) {
-        // Ignore sched instructions when generating code.
-        if (IsSchedInstruction(offset)) {
-            return offset + 1;
-        }
-
-        const Instruction instr = {program_code[offset]};
-        const auto opcode = OpCode::Decode(instr);
-
-        // Decoding failure
-        if (!opcode) {
-            UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
-            return offset + 1;
-        }
 
-        shader.AddLine(
-            fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value));
-
-        using Tegra::Shader::Pred;
-        UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
-                             "NeverExecute predicate not implemented");
-
-        // Some instructions (like SSY) don't have a predicate field, they are always
-        // unconditionally executed.
-        bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
+        return expr + ')';
+    }
 
-        if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
-            shader.AddLine("if (" +
-                           GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) +
-                           ')');
-            shader.AddLine('{');
-            ++shader.scope;
+    std::string GenerateTextureArgument(TextureArgument argument) {
+        const auto [type, operand] = argument;
+        if (operand == nullptr) {
+            return {};
         }
 
-        switch (opcode->get().GetType()) {
-        case OpCode::Type::Arithmetic: {
-            std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-
-            std::string op_b;
-
-            if (instr.is_b_imm) {
-                op_b = GetImmediate19(instr);
+        std::string expr = ", ";
+        switch (type) {
+        case Type::Int:
+            if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+                // Inline the string as an immediate integer in GLSL (some extra arguments are
+                // required to be constant)
+                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
             } else {
-                if (instr.is_b_gpr) {
-                    op_b = regs.GetRegisterAsFloat(instr.gpr20);
-                } else {
-                    op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                           GLSLRegister::Type::Float);
-                }
-            }
-
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::MOV_C:
-            case OpCode::Id::MOV_R: {
-                // MOV does not have neither 'abs' nor 'neg' bits.
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
-                break;
-            }
-
-            case OpCode::Id::FMUL_C:
-            case OpCode::Id::FMUL_R:
-            case OpCode::Id::FMUL_IMM: {
-                // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
-                UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0,
-                                     "FMUL tab5cb8_2({}) is not implemented",
-                                     instr.fmul.tab5cb8_2.Value());
-                UNIMPLEMENTED_IF_MSG(
-                    instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
-                    instr.fmul.tab5c68_0
-                        .Value()); // SMO typical sends 1 here which seems to be the default
-
-                op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
-
-                std::string postfactor_op;
-                if (instr.fmul.postfactor != 0) {
-                    s8 postfactor = static_cast<s8>(instr.fmul.postfactor);
-
-                    // postfactor encoded as 3-bit 1's complement in instruction,
-                    // interpreted with below logic.
-                    if (postfactor >= 4) {
-                        postfactor = 7 - postfactor;
-                    } else {
-                        postfactor = 0 - postfactor;
-                    }
-
-                    if (postfactor > 0) {
-                        postfactor_op = " * " + std::to_string(1 << postfactor);
-                    } else {
-                        postfactor_op = " / " + std::to_string(1 << -postfactor);
-                    }
-                }
-
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1,
-                                        instr.alu.saturate_d, instr.generates_cc, 0, true);
-                break;
-            }
-            case OpCode::Id::FADD_C:
-            case OpCode::Id::FADD_R:
-            case OpCode::Id::FADD_IMM: {
-                op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
-                op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
-                                        instr.alu.saturate_d, instr.generates_cc, 0, true);
-                break;
-            }
-            case OpCode::Id::MUFU: {
-                op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
-                switch (instr.sub_op) {
-                case SubOp::Cos:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, false, 0, true);
-                    break;
-                case SubOp::Sin:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, false, 0, true);
-                    break;
-                case SubOp::Ex2:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, false, 0, true);
-                    break;
-                case SubOp::Lg2:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, false, 0, true);
-                    break;
-                case SubOp::Rcp:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
-                                            instr.alu.saturate_d, false, 0, true);
-                    break;
-                case SubOp::Rsq:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, false, 0, true);
-                    break;
-                case SubOp::Sqrt:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, false, 0, true);
-                    break;
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
-                                      static_cast<unsigned>(instr.sub_op.Value()));
-                }
-                break;
-            }
-            case OpCode::Id::FMNMX_C:
-            case OpCode::Id::FMNMX_R:
-            case OpCode::Id::FMNMX_IMM: {
-                UNIMPLEMENTED_IF_MSG(
-                    instr.generates_cc,
-                    "Condition codes generation in FMNMX is partially implemented");
-
-                op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
-                op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
-                std::string condition =
-                    GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
-                std::string parameters = op_a + ',' + op_b;
-                regs.SetRegisterToFloat(instr.gpr0, 0,
-                                        '(' + condition + ") ? min(" + parameters + ") : max(" +
-                                            parameters + ')',
-                                        1, 1, false, instr.generates_cc, 0, true);
-                break;
-            }
-            case OpCode::Id::RRO_C:
-            case OpCode::Id::RRO_R:
-            case OpCode::Id::RRO_IMM: {
-                // Currently RRO is only implemented as a register move.
-                op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
-                LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
-            }
+                expr += "ftoi(" + Visit(operand) + ')';
             }
             break;
-        }
-        case OpCode::Type::ArithmeticImmediate: {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::MOV32_IMM: {
-                regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
-                break;
-            }
-            case OpCode::Id::FMUL32_IMM: {
-                regs.SetRegisterToFloat(
-                    instr.gpr0, 0,
-                    regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1,
-                    instr.fmul32.saturate, instr.op_32.generates_cc, 0, true);
-                break;
-            }
-            case OpCode::Id::FADD32I: {
-                UNIMPLEMENTED_IF_MSG(
-                    instr.op_32.generates_cc,
-                    "Condition codes generation in FADD32I is partially implemented");
-
-                std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                std::string op_b = GetImmediate32(instr);
-
-                if (instr.fadd32i.abs_a) {
-                    op_a = "abs(" + op_a + ')';
-                }
-
-                if (instr.fadd32i.negate_a) {
-                    op_a = "-(" + op_a + ')';
-                }
-
-                if (instr.fadd32i.abs_b) {
-                    op_b = "abs(" + op_b + ')';
-                }
-
-                if (instr.fadd32i.negate_b) {
-                    op_b = "-(" + op_b + ')';
-                }
-
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false,
-                                        instr.op_32.generates_cc, 0, true);
-                break;
-            }
-            }
-            break;
-        }
-        case OpCode::Type::Bfe: {
-            UNIMPLEMENTED_IF(instr.bfe.negate_b);
-
-            std::string op_a = instr.bfe.negate_a ? "-" : "";
-            op_a += regs.GetRegisterAsInteger(instr.gpr8);
-
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::BFE_IMM: {
-                std::string inner_shift =
-                    '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
-                std::string outer_shift =
-                    '(' + inner_shift + " >> " +
-                    std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
-
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false,
-                                          instr.generates_cc);
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
-            }
-            }
-
+        case Type::Float:
+            expr += Visit(operand);
             break;
-        }
-        case OpCode::Type::Bfi: {
-            const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> {
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::BFI_IMM_R:
-                    return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
-                            std::to_string(instr.alu.GetSignedImm20_20())};
-                default:
-                    UNREACHABLE();
-                    return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
-                            std::to_string(instr.alu.GetSignedImm20_20())};
-                }
-            }();
-            const std::string offset = '(' + packed_shift + " & 0xff)";
-            const std::string bits = "((" + packed_shift + " >> 8) & 0xff)";
-            const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
-            regs.SetRegisterToInteger(instr.gpr0, false, 0,
-                                      "bitfieldInsert(" + base + ", " + insert + ", " + offset +
-                                          ", " + bits + ')',
-                                      1, 1, false, instr.generates_cc);
+        default: {
+            const auto type_int = static_cast<u32>(type);
+            UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
+            expr += '0';
             break;
         }
-        case OpCode::Type::Shift: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
-            std::string op_b;
-
-            if (instr.is_b_imm) {
-                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
-            } else {
-                if (instr.is_b_gpr) {
-                    op_b += regs.GetRegisterAsInteger(instr.gpr20);
-                } else {
-                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                            GLSLRegister::Type::Integer);
-                }
-            }
-
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::SHR_C:
-            case OpCode::Id::SHR_R:
-            case OpCode::Id::SHR_IMM: {
-                if (!instr.shift.is_signed) {
-                    // Logical shift right
-                    op_a = "uint(" + op_a + ')';
-                }
-
-                // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
-                                          1, 1, false, instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::SHL_C:
-            case OpCode::Id::SHL_R:
-            case OpCode::Id::SHL_IMM:
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in SHL is not implemented");
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false,
-                                          instr.generates_cc);
-                break;
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
-            }
-            }
-            break;
         }
-        case OpCode::Type::ArithmeticIntegerImmediate: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
-            std::string op_b = std::to_string(instr.alu.imm20_32.Value());
-
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::IADD32I:
-                UNIMPLEMENTED_IF_MSG(
-                    instr.op_32.generates_cc,
-                    "Condition codes generation in IADD32I is partially implemented");
-
-                if (instr.iadd32i.negate_a)
-                    op_a = "-(" + op_a + ')';
-
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
-                                          instr.iadd32i.saturate, instr.op_32.generates_cc);
-                break;
-            case OpCode::Id::LOP32I: {
-
-                if (instr.alu.lop32i.invert_a)
-                    op_a = "~(" + op_a + ')';
-
-                if (instr.alu.lop32i.invert_b)
-                    op_b = "~(" + op_b + ')';
+        return expr;
+    }
 
-                WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
-                                    Tegra::Shader::PredicateResultMode::None,
-                                    Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc);
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
-                                  opcode->get().GetName());
-            }
-            }
-            break;
+    std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
+        if (aoffi.empty()) {
+            return {};
         }
-        case OpCode::Type::ArithmeticInteger: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
-            std::string op_b;
-            if (instr.is_b_imm) {
-                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
+        constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+        std::string expr = ", ";
+        expr += coord_constructors.at(aoffi.size() - 1);
+        expr += '(';
+
+        for (std::size_t index = 0; index < aoffi.size(); ++index) {
+            const auto operand{aoffi.at(index)};
+            if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+                // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+                // to be constant by the standard).
+                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
             } else {
-                if (instr.is_b_gpr) {
-                    op_b += regs.GetRegisterAsInteger(instr.gpr20);
-                } else {
-                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                            GLSLRegister::Type::Integer);
-                }
-            }
-
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::IADD_C:
-            case OpCode::Id::IADD_R:
-            case OpCode::Id::IADD_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in IADD is partially implemented");
-
-                if (instr.alu_integer.negate_a)
-                    op_a = "-(" + op_a + ')';
-
-                if (instr.alu_integer.negate_b)
-                    op_b = "-(" + op_b + ')';
-
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
-                                          instr.alu.saturate_d, instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::IADD3_C:
-            case OpCode::Id::IADD3_R:
-            case OpCode::Id::IADD3_IMM: {
-                UNIMPLEMENTED_IF_MSG(
-                    instr.generates_cc,
-                    "Condition codes generation in IADD3 is partially implemented");
-
-                std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
-
-                auto apply_height = [](auto height, auto& oprand) {
-                    switch (height) {
-                    case Tegra::Shader::IAdd3Height::None:
-                        break;
-                    case Tegra::Shader::IAdd3Height::LowerHalfWord:
-                        oprand = "((" + oprand + ") & 0xFFFF)";
-                        break;
-                    case Tegra::Shader::IAdd3Height::UpperHalfWord:
-                        oprand = "((" + oprand + ") >> 16)";
-                        break;
-                    default:
-                        UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}",
-                                          static_cast<u32>(height.Value()));
-                    }
-                };
-
-                if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
-                    apply_height(instr.iadd3.height_a, op_a);
-                    apply_height(instr.iadd3.height_b, op_b);
-                    apply_height(instr.iadd3.height_c, op_c);
-                }
-
-                if (instr.iadd3.neg_a)
-                    op_a = "-(" + op_a + ')';
-
-                if (instr.iadd3.neg_b)
-                    op_b = "-(" + op_b + ')';
-
-                if (instr.iadd3.neg_c)
-                    op_c = "-(" + op_c + ')';
-
-                std::string result;
-                if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
-                    switch (instr.iadd3.mode) {
-                    case Tegra::Shader::IAdd3Mode::RightShift:
-                        // TODO(tech4me): According to
-                        // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
-                        // The addition between op_a and op_b should be done in uint33, more
-                        // investigation required
-                        result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')';
-                        break;
-                    case Tegra::Shader::IAdd3Mode::LeftShift:
-                        result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')';
-                        break;
-                    default:
-                        result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
-                        break;
-                    }
-                } else {
-                    result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
-                }
-
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false,
-                                          instr.generates_cc);
-                break;
+                expr += "ftoi(" + Visit(operand) + ')';
             }
-            case OpCode::Id::ISCADD_C:
-            case OpCode::Id::ISCADD_R:
-            case OpCode::Id::ISCADD_IMM: {
-                UNIMPLEMENTED_IF_MSG(
-                    instr.generates_cc,
-                    "Condition codes generation in ISCADD is partially implemented");
-
-                if (instr.alu_integer.negate_a)
-                    op_a = "-(" + op_a + ')';
-
-                if (instr.alu_integer.negate_b)
-                    op_b = "-(" + op_b + ')';
-
-                const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
-
-                regs.SetRegisterToInteger(instr.gpr0, true, 0,
-                                          "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1,
-                                          false, instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::POPC_C:
-            case OpCode::Id::POPC_R:
-            case OpCode::Id::POPC_IMM: {
-                if (instr.popc.invert) {
-                    op_b = "~(" + op_b + ')';
-                }
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1);
-                break;
+            if (index + 1 < aoffi.size()) {
+                expr += ", ";
             }
-            case OpCode::Id::SEL_C:
-            case OpCode::Id::SEL_R:
-            case OpCode::Id::SEL_IMM: {
-                const std::string condition =
-                    GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
-                regs.SetRegisterToInteger(instr.gpr0, true, 0,
-                                          '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
-                break;
-            }
-            case OpCode::Id::LOP_C:
-            case OpCode::Id::LOP_R:
-            case OpCode::Id::LOP_IMM: {
-
-                if (instr.alu.lop.invert_a)
-                    op_a = "~(" + op_a + ')';
-
-                if (instr.alu.lop.invert_b)
-                    op_b = "~(" + op_b + ')';
-
-                WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
-                                    instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
-                                    instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::LOP3_C:
-            case OpCode::Id::LOP3_R:
-            case OpCode::Id::LOP3_IMM: {
-                const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
-                std::string lut;
-
-                if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
-                    lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
-                } else {
-                    lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')';
-                }
-
-                WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::IMNMX_C:
-            case OpCode::Id::IMNMX_R:
-            case OpCode::Id::IMNMX_IMM: {
-                UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
-                UNIMPLEMENTED_IF_MSG(
-                    instr.generates_cc,
-                    "Condition codes generation in IMNMX is partially implemented");
-
-                const std::string condition =
-                    GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
-                const std::string parameters = op_a + ',' + op_b;
-                regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
-                                          '(' + condition + ") ? min(" + parameters + ") : max(" +
-                                              parameters + ')',
-                                          1, 1, false, instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::LEA_R2:
-            case OpCode::Id::LEA_R1:
-            case OpCode::Id::LEA_IMM:
-            case OpCode::Id::LEA_RZ:
-            case OpCode::Id::LEA_HI: {
-                std::string op_c;
-
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::LEA_R2: {
-                    op_a = regs.GetRegisterAsInteger(instr.gpr20);
-                    op_b = regs.GetRegisterAsInteger(instr.gpr39);
-                    op_c = std::to_string(instr.lea.r2.entry_a);
-                    break;
-                }
-
-                case OpCode::Id::LEA_R1: {
-                    const bool neg = instr.lea.r1.neg != 0;
-                    op_a = regs.GetRegisterAsInteger(instr.gpr8);
-                    if (neg)
-                        op_a = "-(" + op_a + ')';
-                    op_b = regs.GetRegisterAsInteger(instr.gpr20);
-                    op_c = std::to_string(instr.lea.r1.entry_a);
-                    break;
-                }
-
-                case OpCode::Id::LEA_IMM: {
-                    const bool neg = instr.lea.imm.neg != 0;
-                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
-                    if (neg)
-                        op_b = "-(" + op_b + ')';
-                    op_a = std::to_string(instr.lea.imm.entry_a);
-                    op_c = std::to_string(instr.lea.imm.entry_b);
-                    break;
-                }
-
-                case OpCode::Id::LEA_RZ: {
-                    const bool neg = instr.lea.rz.neg != 0;
-                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
-                    if (neg)
-                        op_b = "-(" + op_b + ')';
-                    op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
-                                           GLSLRegister::Type::Integer);
-                    op_c = std::to_string(instr.lea.rz.entry_a);
-
-                    break;
-                }
-
-                case OpCode::Id::LEA_HI:
-                default: {
-                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
-                    op_a = std::to_string(instr.lea.imm.entry_a);
-                    op_c = std::to_string(instr.lea.imm.entry_b);
-                    UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
-                }
-                }
-                UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
-                                     "Unhandled LEA Predicate");
-                const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false,
-                                          instr.generates_cc);
-
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}",
-                                  opcode->get().GetName());
-            }
-            }
-
-            break;
         }
-        case OpCode::Type::ArithmeticHalf: {
-            if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
-                opcode->get().GetId() == OpCode::Id::HADD2_R) {
-                UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
-            }
-            const bool negate_a =
-                opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
-            const bool negate_b =
-                opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
-
-            const std::string op_a =
-                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a,
-                             instr.alu_half.abs_a != 0, negate_a);
-
-            std::string op_b;
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::HADD2_C:
-            case OpCode::Id::HMUL2_C:
-                op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                       GLSLRegister::Type::UnsignedInteger);
-                break;
-            case OpCode::Id::HADD2_R:
-            case OpCode::Id::HMUL2_R:
-                op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false);
-                break;
-            default:
-                UNREACHABLE();
-                op_b = "0";
-                break;
-            }
-            op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b);
-
-            const std::string result = [&]() {
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::HADD2_C:
-                case OpCode::Id::HADD2_R:
-                    return '(' + op_a + " + " + op_b + ')';
-                case OpCode::Id::HMUL2_C:
-                case OpCode::Id::HMUL2_R:
-                    return '(' + op_a + " * " + op_b + ')';
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled half float instruction: {}",
-                                      opcode->get().GetName());
-                    return std::string("0");
-                }
-            }();
+        expr += ')';
 
-            regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1,
-                                        instr.alu_half.saturate != 0);
-            break;
-        }
-        case OpCode::Type::ArithmeticHalfImmediate: {
-            if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
-                UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
-            } else {
-                UNIMPLEMENTED_IF(instr.alu_half_imm.precision !=
-                                 Tegra::Shader::HalfPrecision::None);
-            }
+        return expr;
+    }
 
-            const std::string op_a = GetHalfFloat(
-                regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a,
-                instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0);
+    std::string Assign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
 
-            const std::string op_b = UnpackHalfImmediate(instr, true);
+        std::string target;
+        if (const auto gpr = std::get_if<GprNode>(dest)) {
+            if (gpr->GetIndex() == Register::ZeroIndex) {
+                // Writing to Register::ZeroIndex is a no op
+                return {};
+            }
+            target = GetRegister(gpr->GetIndex());
 
-            const std::string result = [&]() {
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::HADD2_IMM:
-                    return op_a + " + " + op_b;
-                case OpCode::Id::HMUL2_IMM:
-                    return op_a + " * " + op_b;
+        } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+            target = [&]() -> std::string {
+                switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
+                case Attribute::Index::Position:
+                    return "position" + GetSwizzle(abuf->GetElement());
+                case Attribute::Index::PointSize:
+                    return "gl_PointSize";
+                case Attribute::Index::ClipDistances0123:
+                    return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']';
+                case Attribute::Index::ClipDistances4567:
+                    return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']';
                 default:
-                    UNREACHABLE();
-                    return std::string("0");
+                    if (attribute >= Attribute::Index::Attribute_0 &&
+                        attribute <= Attribute::Index::Attribute_31) {
+                        return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
+                    }
+                    UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
+                                      static_cast<u32>(attribute));
+                    return "0";
                 }
             }();
 
-            regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1,
-                                        instr.alu_half_imm.saturate != 0);
-            break;
-        }
-        case OpCode::Type::Ffma: {
-            const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-            std::string op_b = instr.ffma.negate_b ? "-" : "";
-            std::string op_c = instr.ffma.negate_c ? "-" : "";
-
-            UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
-            UNIMPLEMENTED_IF_MSG(
-                instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
-                instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
-            UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
-                                 instr.ffma.tab5980_1.Value());
-            UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                 "Condition codes generation in FFMA is partially implemented");
-
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::FFMA_CR: {
-                op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                        GLSLRegister::Type::Float);
-                op_c += regs.GetRegisterAsFloat(instr.gpr39);
-                break;
-            }
-            case OpCode::Id::FFMA_RR: {
-                op_b += regs.GetRegisterAsFloat(instr.gpr20);
-                op_c += regs.GetRegisterAsFloat(instr.gpr39);
-                break;
-            }
-            case OpCode::Id::FFMA_RC: {
-                op_b += regs.GetRegisterAsFloat(instr.gpr39);
-                op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                        GLSLRegister::Type::Float);
-                break;
-            }
-            case OpCode::Id::FFMA_IMM: {
-                op_b += GetImmediate19(instr);
-                op_c += regs.GetRegisterAsFloat(instr.gpr39);
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
-            }
-            }
+        } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
+            target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
 
-            regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
-                                    1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true);
-            break;
+        } else {
+            UNREACHABLE_MSG("Assign called without a proper target");
         }
-        case OpCode::Type::Hfma2: {
-            if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
-                UNIMPLEMENTED_IF(instr.hfma2.rr.precision != Tegra::Shader::HalfPrecision::None);
-            } else {
-                UNIMPLEMENTED_IF(instr.hfma2.precision != Tegra::Shader::HalfPrecision::None);
-            }
-            const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR
-                                      ? instr.hfma2.rr.saturate != 0
-                                      : instr.hfma2.saturate != 0;
-
-            const std::string op_a =
-                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a);
-            std::string op_b, op_c;
-
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::HFMA2_CR:
-                op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                                    GLSLRegister::Type::UnsignedInteger),
-                                    instr.hfma2.type_b, false, instr.hfma2.negate_b);
-                op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
-                                    instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
-                break;
-            case OpCode::Id::HFMA2_RC:
-                op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
-                                    instr.hfma2.type_reg39, false, instr.hfma2.negate_b);
-                op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                                    GLSLRegister::Type::UnsignedInteger),
-                                    instr.hfma2.type_b, false, instr.hfma2.negate_c);
-                break;
-            case OpCode::Id::HFMA2_RR:
-                op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
-                                    instr.hfma2.type_b, false, instr.hfma2.negate_b);
-                op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
-                                    instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c);
-                break;
-            case OpCode::Id::HFMA2_IMM_R:
-                op_b = UnpackHalfImmediate(instr, true);
-                op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
-                                    instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
-                break;
-            default:
-                UNREACHABLE();
-                op_c = op_b = "vec2(0)";
-                break;
-            }
 
-            const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
+        code.AddLine(target + " = " + Visit(src) + ';');
+        return {};
+    }
 
-            regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate);
-            break;
+    std::string Composite(Operation operation) {
+        std::string value = "vec4(";
+        for (std::size_t i = 0; i < 4; ++i) {
+            value += Visit(operation[i]);
+            if (i < 3)
+                value += ", ";
         }
-        case OpCode::Type::Conversion: {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::I2I_R: {
-                UNIMPLEMENTED_IF(instr.conversion.selector);
-
-                std::string op_a = regs.GetRegisterAsInteger(
-                    instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
+        value += ')';
+        return value;
+    }
 
-                if (instr.conversion.abs_a) {
-                    op_a = "abs(" + op_a + ')';
-                }
+    template <Type type>
+    std::string Add(Operation operation) {
+        return GenerateBinaryInfix(operation, "+", type, type, type);
+    }
 
-                if (instr.conversion.negate_a) {
-                    op_a = "-(" + op_a + ')';
-                }
+    template <Type type>
+    std::string Mul(Operation operation) {
+        return GenerateBinaryInfix(operation, "*", type, type, type);
+    }
 
-                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
-                                          1, instr.alu.saturate_d, instr.generates_cc, 0,
-                                          instr.conversion.dest_size);
-                break;
-            }
-            case OpCode::Id::I2F_R:
-            case OpCode::Id::I2F_C: {
-                UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
-                UNIMPLEMENTED_IF(instr.conversion.selector);
-                std::string op_a;
-
-                if (instr.is_b_gpr) {
-                    op_a =
-                        regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed,
-                                                  instr.conversion.src_size);
-                } else {
-                    op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                           instr.conversion.is_input_signed
-                                               ? GLSLRegister::Type::Integer
-                                               : GLSLRegister::Type::UnsignedInteger,
-                                           instr.conversion.src_size);
-                }
+    template <Type type>
+    std::string Div(Operation operation) {
+        return GenerateBinaryInfix(operation, "/", type, type, type);
+    }
 
-                if (instr.conversion.abs_a) {
-                    op_a = "abs(" + op_a + ')';
-                }
+    template <Type type>
+    std::string Fma(Operation operation) {
+        return GenerateTernary(operation, "fma", type, type, type, type);
+    }
 
-                if (instr.conversion.negate_a) {
-                    op_a = "-(" + op_a + ')';
-                }
+    template <Type type>
+    std::string Negate(Operation operation) {
+        return GenerateUnary(operation, "-", type, type, true);
+    }
 
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::F2F_R: {
-                UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
-                UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
-                std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+    template <Type type>
+    std::string Absolute(Operation operation) {
+        return GenerateUnary(operation, "abs", type, type, false);
+    }
 
-                if (instr.conversion.abs_a) {
-                    op_a = "abs(" + op_a + ')';
-                }
+    std::string FClamp(Operation operation) {
+        return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
+                               Type::Float);
+    }
 
-                if (instr.conversion.negate_a) {
-                    op_a = "-(" + op_a + ')';
-                }
+    template <Type type>
+    std::string Min(Operation operation) {
+        return GenerateBinaryCall(operation, "min", type, type, type);
+    }
 
-                switch (instr.conversion.f2f.rounding) {
-                case Tegra::Shader::F2fRoundingOp::None:
-                    break;
-                case Tegra::Shader::F2fRoundingOp::Round:
-                    op_a = "roundEven(" + op_a + ')';
-                    break;
-                case Tegra::Shader::F2fRoundingOp::Floor:
-                    op_a = "floor(" + op_a + ')';
-                    break;
-                case Tegra::Shader::F2fRoundingOp::Ceil:
-                    op_a = "ceil(" + op_a + ')';
-                    break;
-                case Tegra::Shader::F2fRoundingOp::Trunc:
-                    op_a = "trunc(" + op_a + ')';
-                    break;
-                default:
-                    UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
-                                      static_cast<u32>(instr.conversion.f2f.rounding.Value()));
-                    break;
-                }
+    template <Type type>
+    std::string Max(Operation operation) {
+        return GenerateBinaryCall(operation, "max", type, type, type);
+    }
 
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d,
-                                        instr.generates_cc);
-                break;
-            }
-            case OpCode::Id::F2I_R:
-            case OpCode::Id::F2I_C: {
-                UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
-                std::string op_a{};
+    std::string Select(Operation operation) {
+        const std::string condition = Visit(operation[0]);
+        const std::string true_case = Visit(operation[1]);
+        const std::string false_case = Visit(operation[2]);
+        return ApplyPrecise(operation,
+                            '(' + condition + " ? " + true_case + " : " + false_case + ')');
+    }
 
-                if (instr.is_b_gpr) {
-                    op_a = regs.GetRegisterAsFloat(instr.gpr20);
-                } else {
-                    op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                           GLSLRegister::Type::Float);
-                }
+    std::string FCos(Operation operation) {
+        return GenerateUnary(operation, "cos", Type::Float, Type::Float, false);
+    }
 
-                if (instr.conversion.abs_a) {
-                    op_a = "abs(" + op_a + ')';
-                }
+    std::string FSin(Operation operation) {
+        return GenerateUnary(operation, "sin", Type::Float, Type::Float, false);
+    }
 
-                if (instr.conversion.negate_a) {
-                    op_a = "-(" + op_a + ')';
-                }
+    std::string FExp2(Operation operation) {
+        return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false);
+    }
 
-                switch (instr.conversion.f2i.rounding) {
-                case Tegra::Shader::F2iRoundingOp::None:
-                    break;
-                case Tegra::Shader::F2iRoundingOp::Floor:
-                    op_a = "floor(" + op_a + ')';
-                    break;
-                case Tegra::Shader::F2iRoundingOp::Ceil:
-                    op_a = "ceil(" + op_a + ')';
-                    break;
-                case Tegra::Shader::F2iRoundingOp::Trunc:
-                    op_a = "trunc(" + op_a + ')';
-                    break;
-                default:
-                    UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
-                                      static_cast<u32>(instr.conversion.f2i.rounding.Value()));
-                    break;
-                }
+    std::string FLog2(Operation operation) {
+        return GenerateUnary(operation, "log2", Type::Float, Type::Float, false);
+    }
 
-                if (instr.conversion.is_output_signed) {
-                    op_a = "int(" + op_a + ')';
-                } else {
-                    op_a = "uint(" + op_a + ')';
-                }
+    std::string FInverseSqrt(Operation operation) {
+        return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false);
+    }
 
-                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
-                                          1, false, instr.generates_cc, 0,
-                                          instr.conversion.dest_size);
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
-            }
-            }
-            break;
-        }
-        case OpCode::Type::Memory: {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::LD_A: {
-                // Note: Shouldn't this be interp mode flat? As in no interpolation made.
-                UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
-                                     "Indirect attribute loads are not supported");
-                UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
-                                     "Unaligned attribute loads are not supported");
-
-                Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
-                                                  Tegra::Shader::IpaSampleMode::Default};
-
-                u64 next_element = instr.attribute.fmt20.element;
-                u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
-                const auto LoadNextElement = [&](u32 reg_offset) {
-                    regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
-                                                    static_cast<Attribute::Index>(next_index),
-                                                    input_mode, instr.gpr39.Value());
-
-                    // Load the next attribute element into the following register. If the element
-                    // to load goes beyond the vec4 size, load the first element of the next
-                    // attribute.
-                    next_element = (next_element + 1) % 4;
-                    next_index = next_index + (next_element == 0 ? 1 : 0);
-                };
-
-                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-                    LoadNextElement(reg_offset);
-                }
-                break;
-            }
-            case OpCode::Id::LD_C: {
-                UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
-
-                const auto scope = shader.Scope();
-
-                shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
-                               " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
-
-                const std::string op_a =
-                    regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
-                                            GLSLRegister::Type::Float);
-
-                switch (instr.ld_c.type.Value()) {
-                case Tegra::Shader::UniformType::Single:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
-                    break;
-
-                case Tegra::Shader::UniformType::Double: {
-                    const std::string op_b =
-                        regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
-                                                "index", GLSLRegister::Type::Float);
-                    regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
-                    regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
-                    break;
-                }
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled type: {}",
-                                      static_cast<unsigned>(instr.ld_c.type.Value()));
-                }
-                break;
-            }
-            case OpCode::Id::LD_L: {
-                UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
-                                     static_cast<unsigned>(instr.ld_l.unknown.Value()));
+    std::string FSqrt(Operation operation) {
+        return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false);
+    }
 
-                const auto scope = shader.Scope();
+    std::string FRoundEven(Operation operation) {
+        return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false);
+    }
 
-                std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
-                                 std::to_string(instr.smem_imm.Value()) + ')';
+    std::string FFloor(Operation operation) {
+        return GenerateUnary(operation, "floor", Type::Float, Type::Float, false);
+    }
 
-                shader.AddLine("uint index = (" + op + " / 4);");
+    std::string FCeil(Operation operation) {
+        return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false);
+    }
 
-                const std::string op_a = regs.GetLocalMemoryAsFloat("index");
+    std::string FTrunc(Operation operation) {
+        return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false);
+    }
 
-                switch (instr.ldst_sl.type.Value()) {
-                case Tegra::Shader::StoreType::Bytes32:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
-                    break;
-                default:
-                    UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
-                                      static_cast<unsigned>(instr.ldst_sl.type.Value()));
-                }
-                break;
-            }
-            case OpCode::Id::ST_A: {
-                UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
-                                     "Indirect attribute loads are not supported");
-                UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
-                                     "Unaligned attribute loads are not supported");
-
-                u64 next_element = instr.attribute.fmt20.element;
-                u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
-                const auto StoreNextElement = [&](u32 reg_offset) {
-                    regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
-                                                      next_element, instr.gpr0.Value() + reg_offset,
-                                                      instr.gpr39.Value());
-
-                    // Load the next attribute element into the following register. If the element
-                    // to load goes beyond the vec4 size, load the first element of the next
-                    // attribute.
-                    next_element = (next_element + 1) % 4;
-                    next_index = next_index + (next_element == 0 ? 1 : 0);
-                };
-
-                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-                    StoreNextElement(reg_offset);
-                }
+    template <Type type>
+    std::string FCastInteger(Operation operation) {
+        return GenerateUnary(operation, "float", Type::Float, type, false);
+    }
 
-                break;
-            }
-            case OpCode::Id::ST_L: {
-                UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
-                                     static_cast<unsigned>(instr.st_l.unknown.Value()));
+    std::string ICastFloat(Operation operation) {
+        return GenerateUnary(operation, "int", Type::Int, Type::Float, false);
+    }
 
-                const auto scope = shader.Scope();
+    std::string ICastUnsigned(Operation operation) {
+        return GenerateUnary(operation, "int", Type::Int, Type::Uint, false);
+    }
 
-                std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
-                                 std::to_string(instr.smem_imm.Value()) + ')';
+    template <Type type>
+    std::string LogicalShiftLeft(Operation operation) {
+        return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
+    }
 
-                shader.AddLine("uint index = (" + op + " / 4);");
+    std::string ILogicalShiftRight(Operation operation) {
+        const std::string op_a = VisitOperand(operation, 0, Type::Uint);
+        const std::string op_b = VisitOperand(operation, 1, Type::Uint);
 
-                switch (instr.ldst_sl.type.Value()) {
-                case Tegra::Shader::StoreType::Bytes32:
-                    regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0));
-                    break;
-                default:
-                    UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
-                                      static_cast<unsigned>(instr.ldst_sl.type.Value()));
-                }
-                break;
-            }
-            case OpCode::Id::TEX: {
-                Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
-                const bool is_array = instr.tex.array != 0;
-                const bool depth_compare =
-                    instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
-                const auto process_mode = instr.tex.GetTextureProcessMode();
-                UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
-                                     "NODEP is not implemented");
-                UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
-                                     "AOFFI is not implemented");
-
-                const auto [coord, texture] =
-                    GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array);
-
-                const auto scope = shader.Scope();
-                shader.AddLine(coord);
-
-                if (depth_compare) {
-                    regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1);
-                } else {
-                    shader.AddLine("vec4 texture_tmp = " + texture + ';');
-                    std::size_t dest_elem{};
-                    for (std::size_t elem = 0; elem < 4; ++elem) {
-                        if (!instr.tex.IsComponentEnabled(elem)) {
-                            // Skip disabled components
-                            continue;
-                        }
-                        regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
-                                                dest_elem);
-                        ++dest_elem;
-                    }
-                }
-                break;
-            }
-            case OpCode::Id::TEXS: {
-                Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
-                const bool is_array{instr.texs.IsArrayTexture()};
-                const bool depth_compare =
-                    instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
-                const auto process_mode = instr.texs.GetTextureProcessMode();
+        return ApplyPrecise(operation,
+                            BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int));
+    }
 
-                UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
-                                     "NODEP is not implemented");
+    std::string IArithmeticShiftRight(Operation operation) {
+        return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
+    }
 
-                const auto scope = shader.Scope();
+    template <Type type>
+    std::string BitwiseAnd(Operation operation) {
+        return GenerateBinaryInfix(operation, "&", type, type, type);
+    }
 
-                auto [coord, texture] =
-                    GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array);
+    template <Type type>
+    std::string BitwiseOr(Operation operation) {
+        return GenerateBinaryInfix(operation, "|", type, type, type);
+    }
 
-                shader.AddLine(coord);
+    template <Type type>
+    std::string BitwiseXor(Operation operation) {
+        return GenerateBinaryInfix(operation, "^", type, type, type);
+    }
 
-                if (depth_compare) {
-                    texture = "vec4(" + texture + ')';
-                }
-                shader.AddLine("vec4 texture_tmp = " + texture + ';');
+    template <Type type>
+    std::string BitwiseNot(Operation operation) {
+        return GenerateUnary(operation, "~", type, type, false);
+    }
 
-                if (instr.texs.fp32_flag) {
-                    WriteTexsInstructionFloat(instr, "texture_tmp");
-                } else {
-                    WriteTexsInstructionHalfFloat(instr, "texture_tmp");
-                }
-                break;
-            }
-            case OpCode::Id::TLDS: {
-                const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
-                const bool is_array{instr.tlds.IsArrayTexture()};
+    std::string UCastFloat(Operation operation) {
+        return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false);
+    }
 
-                UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
-                                     "NODEP is not implemented");
-                UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
-                                     "AOFFI is not implemented");
-                UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
-                                     "MZ is not implemented");
+    std::string UCastSigned(Operation operation) {
+        return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false);
+    }
 
-                const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array);
+    std::string UShiftRight(Operation operation) {
+        return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
+    }
 
-                const auto scope = shader.Scope();
+    template <Type type>
+    std::string BitfieldInsert(Operation operation) {
+        return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
+                                  Type::Int);
+    }
 
-                shader.AddLine(coord);
-                shader.AddLine("vec4 texture_tmp = " + texture + ';');
-                WriteTexsInstructionFloat(instr, "texture_tmp");
-                break;
-            }
-            case OpCode::Id::TLD4: {
-
-                UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
-                                     "NODEP is not implemented");
-                UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
-                                     "AOFFI is not implemented");
-                UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
-                                     "NDV is not implemented");
-                UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
-                                     "PTP is not implemented");
-
-                auto texture_type = instr.tld4.texture_type.Value();
-                const bool depth_compare =
-                    instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
-                const bool is_array = instr.tld4.array != 0;
-
-                const auto [coord, texture] =
-                    GetTLD4Code(instr, texture_type, depth_compare, is_array);
-
-                const auto scope = shader.Scope();
-
-                shader.AddLine(coord);
-                std::size_t dest_elem{};
-
-                shader.AddLine("vec4 texture_tmp = " + texture + ';');
-                for (std::size_t elem = 0; elem < 4; ++elem) {
-                    if (!instr.tex.IsComponentEnabled(elem)) {
-                        // Skip disabled components
-                        continue;
-                    }
-                    regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
-                                            dest_elem);
-                    ++dest_elem;
-                }
-                break;
-            }
-            case OpCode::Id::TLD4S: {
-                UNIMPLEMENTED_IF_MSG(
-                    instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
-                    "NODEP is not implemented");
-                UNIMPLEMENTED_IF_MSG(
-                    instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
-                    "AOFFI is not implemented");
+    template <Type type>
+    std::string BitfieldExtract(Operation operation) {
+        return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
+    }
 
-                const auto scope = shader.Scope();
+    template <Type type>
+    std::string BitCount(Operation operation) {
+        return GenerateUnary(operation, "bitCount", type, type, false);
+    }
 
-                std::string coords;
+    std::string HNegate(Operation operation) {
+        const auto GetNegate = [&](std::size_t index) -> std::string {
+            return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1";
+        };
+        const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" +
+                                  GetNegate(1) + ", " + GetNegate(2) + "))";
+        return BitwiseCastResult(value, Type::HalfFloat);
+    }
 
-                const bool depth_compare =
-                    instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
+    std::string HMergeF32(Operation operation) {
+        return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
+    }
 
-                const std::string sampler = GetSampler(
-                    instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
+    std::string HMergeH0(Operation operation) {
+        return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" +
+               Visit(operation[1]) + ")[0]))";
+    }
 
-                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                coords = "vec2 coords = vec2(" + op_a + ", ";
-                std::string texture = "textureGather(" + sampler + ", coords, ";
+    std::string HMergeH1(Operation operation) {
+        return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" +
+               Visit(operation[1]) + ")[1]))";
+    }
 
-                if (!depth_compare) {
-                    const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
-                    coords += op_b + ");";
-                    texture += std::to_string(instr.tld4s.component) + ')';
-                } else {
-                    const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
-                    coords += op_b + ");";
-                    texture += op_c + ')';
-                }
-                shader.AddLine(coords);
-                shader.AddLine("vec4 texture_tmp = " + texture + ';');
-                WriteTexsInstructionFloat(instr, "texture_tmp");
-                break;
-            }
-            case OpCode::Id::TXQ: {
-                UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
-                                     "NODEP is not implemented");
-
-                const auto scope = shader.Scope();
-
-                // TODO: The new commits on the texture refactor, change the way samplers work.
-                // Sadly, not all texture instructions specify the type of texture their sampler
-                // uses. This must be fixed at a later instance.
-                const std::string sampler =
-                    GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
-                switch (instr.txq.query_type) {
-                case Tegra::Shader::TextureQueryType::Dimension: {
-                    const std::string texture = "textureSize(" + sampler + ", " +
-                                                regs.GetRegisterAsInteger(instr.gpr8) + ')';
-                    const std::string mip_level = "textureQueryLevels(" + sampler + ')';
-                    shader.AddLine("ivec2 sizes = " + texture + ';');
-
-                    regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1);
-                    regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1);
-                    regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1);
-                    regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1);
-                    break;
-                }
-                default: {
-                    UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
-                                      static_cast<u32>(instr.txq.query_type.Value()));
-                }
-                }
-                break;
-            }
-            case OpCode::Id::TMML: {
-                UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
-                                     "NODEP is not implemented");
-                UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
-                                     "NDV is not implemented");
-
-                const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                const bool is_array = instr.tmml.array != 0;
-                auto texture_type = instr.tmml.texture_type.Value();
-                const std::string sampler =
-                    GetSampler(instr.sampler, texture_type, is_array, false);
-
-                const auto scope = shader.Scope();
-
-                // TODO: Add coordinates for different samplers once other texture types are
-                // implemented.
-                switch (texture_type) {
-                case Tegra::Shader::TextureType::Texture1D: {
-                    shader.AddLine("float coords = " + x + ';');
-                    break;
-                }
-                case Tegra::Shader::TextureType::Texture2D: {
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
-                    break;
-                }
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
+    std::string HPack2(Operation operation) {
+        return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))";
+    }
 
-                    // Fallback to interpreting as a 2D texture for now
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
-                    texture_type = Tegra::Shader::TextureType::Texture2D;
-                }
+    template <Type type>
+    std::string LogicalLessThan(Operation operation) {
+        return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
+    }
 
-                const std::string texture = "textureQueryLod(" + sampler + ", coords)";
-                shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);");
+    template <Type type>
+    std::string LogicalEqual(Operation operation) {
+        return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
+    }
 
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
-                regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
-            }
-            }
-            break;
-        }
-        case OpCode::Type::FloatSetPredicate: {
-            const std::string op_a =
-                GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), instr.fsetp.abs_a != 0,
-                                 instr.fsetp.neg_a != 0);
+    template <Type type>
+    std::string LogicalLessEqual(Operation operation) {
+        return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
+    }
 
-            std::string op_b;
+    template <Type type>
+    std::string LogicalGreaterThan(Operation operation) {
+        return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
+    }
 
-            if (instr.is_b_imm) {
-                op_b += '(' + GetImmediate19(instr) + ')';
-            } else {
-                if (instr.is_b_gpr) {
-                    op_b += regs.GetRegisterAsFloat(instr.gpr20);
-                } else {
-                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                            GLSLRegister::Type::Float);
-                }
-            }
+    template <Type type>
+    std::string LogicalNotEqual(Operation operation) {
+        return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
+    }
 
-            if (instr.fsetp.abs_b) {
-                op_b = "abs(" + op_b + ')';
-            }
+    template <Type type>
+    std::string LogicalGreaterEqual(Operation operation) {
+        return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
+    }
 
-            // We can't use the constant predicate as destination.
-            ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+    std::string LogicalFIsNan(Operation operation) {
+        return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false);
+    }
 
-            const std::string second_pred =
-                GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
+    std::string LogicalAssign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
 
-            const std::string combiner = GetPredicateCombiner(instr.fsetp.op);
+        std::string target;
 
-            const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
-            // Set the primary predicate to the result of Predicate OP SecondPredicate
-            SetPredicate(instr.fsetp.pred3,
-                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+        if (const auto pred = std::get_if<PredicateNode>(dest)) {
+            ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
 
-            if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-                // if enabled
-                SetPredicate(instr.fsetp.pred0,
-                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+            const auto index = pred->GetIndex();
+            switch (index) {
+            case Tegra::Shader::Pred::NeverExecute:
+            case Tegra::Shader::Pred::UnusedIndex:
+                // Writing to these predicates is a no-op
+                return {};
             }
-            break;
+            target = GetPredicate(index);
+        } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
+            target = GetInternalFlag(flag->GetFlag());
         }
-        case OpCode::Type::IntegerSetPredicate: {
-            const std::string op_a =
-                regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
-            std::string op_b;
-
-            if (instr.is_b_imm) {
-                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
-            } else {
-                if (instr.is_b_gpr) {
-                    op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
-                } else {
-                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                            GLSLRegister::Type::Integer);
-                }
-            }
 
-            // We can't use the constant predicate as destination.
-            ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+        code.AddLine(target + " = " + Visit(src) + ';');
+        return {};
+    }
 
-            const std::string second_pred =
-                GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
+    std::string LogicalAnd(Operation operation) {
+        return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
+    }
 
-            const std::string combiner = GetPredicateCombiner(instr.isetp.op);
+    std::string LogicalOr(Operation operation) {
+        return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
+    }
 
-            const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
-            // Set the primary predicate to the result of Predicate OP SecondPredicate
-            SetPredicate(instr.isetp.pred3,
-                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+    std::string LogicalXor(Operation operation) {
+        return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
+    }
 
-            if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-                // if enabled
-                SetPredicate(instr.isetp.pred0,
-                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
-            }
-            break;
-        }
-        case OpCode::Type::HalfSetPredicate: {
-            UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
-
-            const std::string op_a =
-                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a,
-                             instr.hsetp2.abs_a, instr.hsetp2.negate_a);
-
-            const std::string op_b = [&]() {
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::HSETP2_R:
-                    return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
-                                        instr.hsetp2.type_b, instr.hsetp2.abs_a,
-                                        instr.hsetp2.negate_b);
-                default:
-                    UNREACHABLE();
-                    return std::string("vec2(0)");
-                }
-            }();
+    std::string LogicalNegate(Operation operation) {
+        return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false);
+    }
 
-            // We can't use the constant predicate as destination.
-            ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
+    std::string LogicalPick2(Operation operation) {
+        const std::string pair = VisitOperand(operation, 0, Type::Bool2);
+        return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']';
+    }
 
-            const std::string second_pred =
-                GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
+    std::string LogicalAll2(Operation operation) {
+        return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
+    }
 
-            const std::string combiner = GetPredicateCombiner(instr.hsetp2.op);
+    std::string LogicalAny2(Operation operation) {
+        return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
+    }
 
-            const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||";
-            const std::string predicate =
-                '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' +
-                component_combiner + ' ' +
-                GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')';
+    std::string Logical2HLessThan(Operation operation) {
+        return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat,
+                                  Type::HalfFloat);
+    }
 
-            // Set the primary predicate to the result of Predicate OP SecondPredicate
-            SetPredicate(instr.hsetp2.pred3,
-                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+    std::string Logical2HEqual(Operation operation) {
+        return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat,
+                                  Type::HalfFloat);
+    }
 
-            if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-                // if enabled
-                SetPredicate(instr.hsetp2.pred0,
-                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
-            }
-            break;
-        }
-        case OpCode::Type::PredicateSetRegister: {
-            UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                 "Condition codes generation in PSET is partially implemented");
-
-            const std::string op_a =
-                GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
-            const std::string op_b =
-                GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
-
-            const std::string second_pred =
-                GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
-
-            const std::string combiner = GetPredicateCombiner(instr.pset.op);
-
-            const std::string predicate =
-                '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
-            const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
-            if (instr.pset.bf == 0) {
-                const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
-                regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false,
-                                          instr.generates_cc);
-            } else {
-                const std::string value = '(' + result + ") ? 1.0 : 0.0";
-                regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc);
-            }
-            break;
-        }
-        case OpCode::Type::PredicateSetPredicate: {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::PSETP: {
-                const std::string op_a =
-                    GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
-                const std::string op_b =
-                    GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
-
-                // We can't use the constant predicate as destination.
-                ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
-                const std::string second_pred =
-                    GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
-
-                const std::string combiner = GetPredicateCombiner(instr.psetp.op);
-
-                const std::string predicate =
-                    '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
-
-                // Set the primary predicate to the result of Predicate OP SecondPredicate
-                SetPredicate(instr.psetp.pred3,
-                             '(' + predicate + ") " + combiner + " (" + second_pred + ')');
-
-                if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                    // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-                    // if enabled
-                    SetPredicate(instr.psetp.pred0,
-                                 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
-                }
-                break;
-            }
-            case OpCode::Id::CSETP: {
-                const std::string pred =
-                    GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
-                const std::string combiner = GetPredicateCombiner(instr.csetp.op);
-                const std::string condition_code = regs.GetConditionCode(instr.csetp.cc);
-                if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
-                    SetPredicate(instr.csetp.pred3,
-                                 '(' + condition_code + ") " + combiner + " (" + pred + ')');
-                }
-                if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                    SetPredicate(instr.csetp.pred0,
-                                 "!(" + condition_code + ") " + combiner + " (" + pred + ')');
-                }
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
-            }
-            }
-            break;
-        }
-        case OpCode::Type::RegisterSetPredicate: {
-            UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
+    std::string Logical2HLessEqual(Operation operation) {
+        return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat,
+                                  Type::HalfFloat);
+    }
 
-            const std::string apply_mask = [&]() {
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::R2P_IMM:
-                    return std::to_string(instr.r2p.immediate_mask);
-                default:
-                    UNREACHABLE();
-                    return std::to_string(instr.r2p.immediate_mask);
-                }
-            }();
-            const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
-                                     " >> " + std::to_string(instr.r2p.byte) + ')';
+    std::string Logical2HGreaterThan(Operation operation) {
+        return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat,
+                                  Type::HalfFloat);
+    }
 
-            constexpr u64 programmable_preds = 7;
-            for (u64 pred = 0; pred < programmable_preds; ++pred) {
-                const auto shift = std::to_string(1 << pred);
+    std::string Logical2HNotEqual(Operation operation) {
+        return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat,
+                                  Type::HalfFloat);
+    }
 
-                shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {");
-                ++shader.scope;
+    std::string Logical2HGreaterEqual(Operation operation) {
+        return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat,
+                                  Type::HalfFloat);
+    }
 
-                SetPredicate(pred, '(' + mask + " & " + shift + ") != 0");
+    std::string Texture(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
 
-                --shader.scope;
-                shader.AddLine('}');
-            }
-            break;
+        std::string expr = GenerateTexture(
+            operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
+        if (meta->sampler.IsShadow()) {
+            expr = "vec4(" + expr + ')';
         }
-        case OpCode::Type::FloatSet: {
-            const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8),
-                                                      instr.fset.abs_a != 0, instr.fset.neg_a != 0);
-
-            std::string op_b;
-
-            if (instr.is_b_imm) {
-                const std::string imm = GetImmediate19(instr);
-                op_b = imm;
-            } else {
-                if (instr.is_b_gpr) {
-                    op_b = regs.GetRegisterAsFloat(instr.gpr20);
-                } else {
-                    op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                           GLSLRegister::Type::Float);
-                }
-            }
-
-            op_b = GetOperandAbsNeg(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
-
-            // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
-            // condition is true, and to 0 otherwise.
-            const std::string second_pred =
-                GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
-
-            const std::string combiner = GetPredicateCombiner(instr.fset.op);
+        return expr + GetSwizzle(meta->element);
+    }
 
-            const std::string predicate = "((" +
-                                          GetPredicateComparison(instr.fset.cond, op_a, op_b) +
-                                          ") " + combiner + " (" + second_pred + "))";
+    std::string TextureLod(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
 
-            if (instr.fset.bf) {
-                regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false,
-                                        instr.generates_cc);
-            } else {
-                regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
-                                          1, false, instr.generates_cc);
-            }
-            break;
+        std::string expr = GenerateTexture(
+            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
+        if (meta->sampler.IsShadow()) {
+            expr = "vec4(" + expr + ')';
         }
-        case OpCode::Type::IntegerSet: {
-            const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+        return expr + GetSwizzle(meta->element);
+    }
 
-            std::string op_b;
+    std::string TextureGather(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
 
-            if (instr.is_b_imm) {
-                op_b = std::to_string(instr.alu.GetSignedImm20_20());
-            } else {
-                if (instr.is_b_gpr) {
-                    op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed);
-                } else {
-                    op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                           GLSLRegister::Type::Integer);
-                }
-            }
-
-            // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
-            // condition is true, and to 0 otherwise.
-            const std::string second_pred =
-                GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
+        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
+        return GenerateTexture(operation, "Gather",
+                               {TextureArgument{type, meta->component}, TextureAoffi{}}) +
+               GetSwizzle(meta->element);
+    }
 
-            const std::string combiner = GetPredicateCombiner(instr.iset.op);
+    std::string TextureQueryDimensions(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
 
-            const std::string predicate = "((" +
-                                          GetPredicateComparison(instr.iset.cond, op_a, op_b) +
-                                          ") " + combiner + " (" + second_pred + "))";
+        const std::string sampler = GetSampler(meta->sampler);
+        const std::string lod = VisitOperand(operation, 0, Type::Int);
 
-            if (instr.iset.bf) {
-                regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
-            } else {
-                regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
-                                          1);
-            }
-            break;
+        switch (meta->element) {
+        case 0:
+        case 1:
+            return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
+                   GetSwizzle(meta->element) + "))";
+        case 2:
+            return "0";
+        case 3:
+            return "itof(textureQueryLevels(" + sampler + "))";
         }
-        case OpCode::Type::HalfSet: {
-            UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
-
-            const std::string op_a =
-                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a,
-                             instr.hset2.abs_a != 0, instr.hset2.negate_a != 0);
-
-            const std::string op_b = [&]() {
-                switch (opcode->get().GetId()) {
-                case OpCode::Id::HSET2_R:
-                    return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
-                                        instr.hset2.type_b, instr.hset2.abs_b != 0,
-                                        instr.hset2.negate_b != 0);
-                default:
-                    UNREACHABLE();
-                    return std::string("vec2(0)");
-                }
-            }();
-
-            const std::string second_pred =
-                GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0);
-
-            const std::string combiner = GetPredicateCombiner(instr.hset2.op);
-
-            // HSET2 operates on each half float in the pack.
-            std::string result;
-            for (int i = 0; i < 2; ++i) {
-                const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000";
-                const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000";
-                const std::string value = instr.hset2.bf == 1 ? float_value : integer_value;
+        UNREACHABLE();
+        return "0";
+    }
 
-                const std::string comp = std::string(".") + "xy"[i];
-                const std::string predicate =
-                    "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) +
-                    ") " + combiner + " (" + second_pred + "))";
+    std::string TextureQueryLod(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
 
-                result += '(' + predicate + " ? " + value + " : 0)";
-                if (i == 0) {
-                    result += " | ";
-                }
-            }
-            regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1);
-            break;
+        if (meta->element < 2) {
+            return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
+                   GetSwizzle(meta->element) + "))";
         }
-        case OpCode::Type::Xmad: {
-            UNIMPLEMENTED_IF(instr.xmad.sign_a);
-            UNIMPLEMENTED_IF(instr.xmad.sign_b);
-            UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                 "Condition codes generation in XMAD is partially implemented");
-
-            std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
-            std::string op_b;
-            std::string op_c;
-
-            // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
-            UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
-            const bool is_signed{instr.xmad.sign_a == 1};
-
-            bool is_merge{};
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::XMAD_CR: {
-                is_merge = instr.xmad.merge_56;
-                op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                        instr.xmad.sign_b ? GLSLRegister::Type::Integer
-                                                          : GLSLRegister::Type::UnsignedInteger);
-                op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
-                break;
-            }
-            case OpCode::Id::XMAD_RR: {
-                is_merge = instr.xmad.merge_37;
-                op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b);
-                op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
-                break;
-            }
-            case OpCode::Id::XMAD_RC: {
-                op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b);
-                op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
-                                        is_signed ? GLSLRegister::Type::Integer
-                                                  : GLSLRegister::Type::UnsignedInteger);
-                break;
-            }
-            case OpCode::Id::XMAD_IMM: {
-                is_merge = instr.xmad.merge_37;
-                op_b += std::to_string(instr.xmad.imm20_16);
-                op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
-            }
-            }
-
-            // TODO(bunnei): Ensure this is right with signed operands
-            if (instr.xmad.high_a) {
-                op_a = "((" + op_a + ") >> 16)";
-            } else {
-                op_a = "((" + op_a + ") & 0xFFFF)";
-            }
-
-            std::string src2 = '(' + op_b + ')'; // Preserve original source 2
-            if (instr.xmad.high_b) {
-                op_b = '(' + src2 + " >> 16)";
-            } else {
-                op_b = '(' + src2 + " & 0xFFFF)";
-            }
-
-            std::string product = '(' + op_a + " * " + op_b + ')';
-            if (instr.xmad.product_shift_left) {
-                product = '(' + product + " << 16)";
-            }
+        return "0";
+    }
 
-            switch (instr.xmad.mode) {
-            case Tegra::Shader::XmadMode::None:
-                break;
-            case Tegra::Shader::XmadMode::CLo:
-                op_c = "((" + op_c + ") & 0xFFFF)";
-                break;
-            case Tegra::Shader::XmadMode::CHi:
-                op_c = "((" + op_c + ") >> 16)";
-                break;
-            case Tegra::Shader::XmadMode::CBcc:
-                op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
-                break;
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}",
-                                  static_cast<u32>(instr.xmad.mode.Value()));
-            }
-            }
+    std::string TexelFetch(Operation operation) {
+        constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
+        UNIMPLEMENTED_IF(meta->sampler.IsArray());
+        const std::size_t count = operation.GetOperandsCount();
 
-            std::string sum{'(' + product + " + " + op_c + ')'};
-            if (is_merge) {
-                sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))";
-            }
+        std::string expr = "texelFetch(";
+        expr += GetSampler(meta->sampler);
+        expr += ", ";
 
-            regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false,
-                                      instr.generates_cc);
-            break;
+        expr += constructors.at(operation.GetOperandsCount() - 1);
+        expr += '(';
+        for (std::size_t i = 0; i < count; ++i) {
+            expr += VisitOperand(operation, i, Type::Int);
+            const std::size_t next = i + 1;
+            if (next == count)
+                expr += ')';
+            else if (next < count)
+                expr += ", ";
         }
-        default: {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::EXIT: {
-                const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-                UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
-                                     "EXIT condition code used: {}", static_cast<u32>(cc));
-
-                if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
-                    EmitFragmentOutputsWrite();
-                }
-
-                switch (instr.flow.cond) {
-                case Tegra::Shader::FlowCondition::Always:
-                    shader.AddLine("return true;");
-                    if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
-                        // If this is an unconditional exit then just end processing here,
-                        // otherwise we have to account for the possibility of the condition
-                        // not being met, so continue processing the next instruction.
-                        offset = PROGRAM_END - 1;
-                    }
-                    break;
-
-                case Tegra::Shader::FlowCondition::Fcsm_Tr:
-                    // TODO(bunnei): What is this used for? If we assume this conditon is not
-                    // satisifed, dual vertex shaders in Farming Simulator make more sense
-                    UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
-                    break;
+        if (meta->lod) {
+            expr += ", ";
+            expr += CastOperand(Visit(meta->lod), Type::Int);
+        }
+        expr += ')';
 
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
-                                      static_cast<u32>(instr.flow.cond.Value()));
-                }
-                break;
-            }
-            case OpCode::Id::KIL: {
-                UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
+        return expr + GetSwizzle(meta->element);
+    }
 
-                const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-                UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
-                                     "KIL condition code used: {}", static_cast<u32>(cc));
+    std::string Branch(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        UNIMPLEMENTED_IF(!target);
 
-                // Enclose "discard" in a conditional, so that GLSL compilation does not complain
-                // about unexecuted instructions that may follow this.
-                shader.AddLine("if (true) {");
-                ++shader.scope;
-                shader.AddLine("discard;");
-                --shader.scope;
-                shader.AddLine("}");
+        code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue()));
+        code.AddLine("break;");
+        return {};
+    }
 
-                break;
-            }
-            case OpCode::Id::OUT_R: {
-                UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
-                                     "Stream buffer is not supported");
-                ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
-                           "OUT is expected to be used in a geometry shader.");
-
-                if (instr.out.emit) {
-                    // gpr0 is used to store the next address. Hardware returns a pointer but
-                    // we just return the next index with a cyclic cap.
-                    const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)};
-                    const std::string next = "((" + current + " + 1" + ") % " +
-                                             std::to_string(MAX_GEOMETRY_BUFFERS) + ')';
-                    shader.AddLine("emit_vertex(" + current + ");");
-                    regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1);
-                }
-                if (instr.out.cut) {
-                    shader.AddLine("EndPrimitive();");
-                }
+    std::string PushFlowStack(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        UNIMPLEMENTED_IF(!target);
 
-                break;
-            }
-            case OpCode::Id::MOV_SYS: {
-                switch (instr.sys20) {
-                case Tegra::Shader::SystemVariable::InvocationInfo: {
-                    LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
-                    regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
-                    break;
-                }
-                case Tegra::Shader::SystemVariable::Ydirection: {
-                    // Config pack's third value is Y_NEGATE's state.
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1);
-                    break;
-                }
-                default: {
-                    UNIMPLEMENTED_MSG("Unhandled system move: {}",
-                                      static_cast<u32>(instr.sys20.Value()));
-                }
-                }
-                break;
-            }
-            case OpCode::Id::ISBERD: {
-                UNIMPLEMENTED_IF(instr.isberd.o != 0);
-                UNIMPLEMENTED_IF(instr.isberd.skew != 0);
-                UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
-                UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
-                ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
-                           "ISBERD is expected to be used in a geometry shader.");
-                LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
-                regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1);
-                break;
-            }
-            case OpCode::Id::BRA: {
-                UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
-                                     "BRA with constant buffers are not implemented");
-
-                const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-                const u32 target = offset + instr.bra.GetBranchTarget();
-                if (cc != Tegra::Shader::ConditionCode::T) {
-                    const std::string condition_code = regs.GetConditionCode(cc);
-                    shader.AddLine("if (" + condition_code + "){");
-                    shader.scope++;
-                    shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
-                    shader.scope--;
-                    shader.AddLine('}');
-                } else {
-                    shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
-                }
-                break;
-            }
-            case OpCode::Id::IPA: {
-                const auto& attribute = instr.attribute.fmt28;
-                const auto& reg = instr.gpr0;
-
-                Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
-                                                  instr.ipa.sample_mode.Value()};
-                regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index,
-                                                input_mode);
+        code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()));
+        return {};
+    }
 
-                if (instr.ipa.saturate) {
-                    regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true);
-                }
-                break;
-            }
-            case OpCode::Id::SSY: {
-                // The SSY opcode tells the GPU where to re-converge divergent execution paths, it
-                // sets the target of the jump that the SYNC instruction will make. The SSY opcode
-                // has a similar structure to the BRA opcode.
-                UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
-                                     "Constant buffer flow is not supported");
-
-                const u32 target = offset + instr.bra.GetBranchTarget();
-                EmitPushToFlowStack(target);
-                break;
-            }
-            case OpCode::Id::PBK: {
-                // PBK pushes to a stack the address where BRK will jump to. This shares stack with
-                // SSY but using SYNC on a PBK address will kill the shader execution. We don't
-                // emulate this because it's very unlikely a driver will emit such invalid shader.
-                UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
-                                     "Constant buffer PBK is not supported");
-
-                const u32 target = offset + instr.bra.GetBranchTarget();
-                EmitPushToFlowStack(target);
-                break;
-            }
-            case OpCode::Id::SYNC: {
-                const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-                UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
-                                     "SYNC condition code used: {}", static_cast<u32>(cc));
+    std::string PopFlowStack(Operation operation) {
+        code.AddLine("jmp_to = flow_stack[--flow_stack_top];");
+        code.AddLine("break;");
+        return {};
+    }
 
-                // The SYNC opcode jumps to the address previously set by the SSY opcode
-                EmitPopFromFlowStack();
-                break;
+    std::string Exit(Operation operation) {
+        if (stage != ShaderStage::Fragment) {
+            code.AddLine("return;");
+            return {};
+        }
+        const auto& used_registers = ir.GetRegisters();
+        const auto SafeGetRegister = [&](u32 reg) -> std::string {
+            // TODO(Rodrigo): Replace with contains once C++20 releases
+            if (used_registers.find(reg) != used_registers.end()) {
+                return GetRegister(reg);
             }
-            case OpCode::Id::BRK: {
-                // The BRK opcode jumps to the address previously set by the PBK opcode
-                const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-                UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
-                                     "BRK condition code used: {}", static_cast<u32>(cc));
+            return "0.0f";
+        };
 
-                EmitPopFromFlowStack();
-                break;
-            }
-            case OpCode::Id::DEPBAR: {
-                // TODO(Subv): Find out if we actually have to care about this instruction or if
-                // the GLSL compiler takes care of that for us.
-                LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
-                break;
-            }
-            case OpCode::Id::VMAD: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in VMAD is not implemented");
-
-                const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
-                const std::string op_a = GetVideoOperandA(instr);
-                const std::string op_b = GetVideoOperandB(instr);
-                const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
-
-                std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
-
-                switch (instr.vmad.shr) {
-                case Tegra::Shader::VmadShr::Shr7:
-                    result = '(' + result + " >> 7)";
-                    break;
-                case Tegra::Shader::VmadShr::Shr15:
-                    result = '(' + result + " >> 15)";
-                    break;
-                }
+        UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
 
-                regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
-                                          instr.vmad.saturate, instr.vmad.cc);
-                break;
+        code.AddLine("if (alpha_test[0] != 0) {");
+        ++code.scope;
+        // We start on the register containing the alpha value in the first RT.
+        u32 current_reg = 3;
+        for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
+            // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when
+            // multiple render targets are used.
+            if (header.ps.IsColorComponentOutputEnabled(render_target, 0) ||
+                header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
+                header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
+                header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
+                code.AddLine(
+                    fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)));
+                current_reg += 4;
             }
-            case OpCode::Id::VSETP: {
-                const std::string op_a = GetVideoOperandA(instr);
-                const std::string op_b = GetVideoOperandB(instr);
-
-                // We can't use the constant predicate as destination.
-                ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
-                const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false);
-
-                const std::string combiner = GetPredicateCombiner(instr.vsetp.op);
-
-                const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b);
-                // Set the primary predicate to the result of Predicate OP SecondPredicate
-                SetPredicate(instr.vsetp.pred3,
-                             '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+        }
+        --code.scope;
+        code.AddLine('}');
 
-                if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                    // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-                    // if enabled
-                    SetPredicate(instr.vsetp.pred0,
-                                 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+        // Write the color outputs using the data in the shader registers, disabled
+        // rendertargets/components are skipped in the register assignment.
+        current_reg = 0;
+        for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
+            // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
+            for (u32 component = 0; component < 4; ++component) {
+                if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
+                    code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
+                                             SafeGetRegister(current_reg)));
+                    ++current_reg;
                 }
-                break;
-            }
-            default: {
-                UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
-                break;
-            }
             }
-
-            break;
-        }
         }
 
-        // Close the predicate condition scope.
-        if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
-            --shader.scope;
-            shader.AddLine('}');
+        if (header.ps.omap.depth) {
+            // The depth output is always 2 registers after the last color output, and current_reg
+            // already contains one past the last color register.
+            code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';');
         }
 
-        return offset + 1;
+        code.AddLine("return;");
+        return {};
     }
 
-    /**
-     * Compiles a range of instructions from Tegra to GLSL.
-     * @param begin the offset of the starting instruction.
-     * @param end the offset where the compilation should stop (exclusive).
-     * @return the offset of the next instruction to compile. PROGRAM_END if the program
-     * terminates.
-     */
-    u32 CompileRange(u32 begin, u32 end) {
-        u32 program_counter;
-        for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
-            program_counter = CompileInstr(program_counter);
-        }
-        return program_counter;
+    std::string Discard(Operation operation) {
+        // Enclose "discard" in a conditional, so that GLSL compilation does not complain
+        // about unexecuted instructions that may follow this.
+        code.AddLine("if (true) {");
+        ++code.scope;
+        code.AddLine("discard;");
+        --code.scope;
+        code.AddLine("}");
+        return {};
     }
 
-    void Generate(const std::string& suffix) {
-        // Add declarations for all subroutines
-        for (const auto& subroutine : subroutines) {
-            shader.AddLine("bool " + subroutine.GetName() + "();");
-        }
-        shader.AddNewLine();
-
-        // Add the main entry point
-        shader.AddLine("bool exec_" + suffix + "() {");
-        ++shader.scope;
-        CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
-        --shader.scope;
-        shader.AddLine("}\n");
-
-        // Add definitions for all subroutines
-        for (const auto& subroutine : subroutines) {
-            std::set<u32> labels = subroutine.labels;
+    std::string EmitVertex(Operation operation) {
+        ASSERT_MSG(stage == ShaderStage::Geometry,
+                   "EmitVertex is expected to be used in a geometry shader.");
 
-            shader.AddLine("bool " + subroutine.GetName() + "() {");
-            ++shader.scope;
-
-            if (labels.empty()) {
-                if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
-                    shader.AddLine("return false;");
-                }
-            } else {
-                labels.insert(subroutine.begin);
-                shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
+        // If a geometry shader is attached, it will always flip (it's the last stage before
+        // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
+        code.AddLine("position.xy *= viewport_flip.xy;");
+        code.AddLine("gl_Position = position;");
+        code.AddLine("position.w = 1.0;");
+        code.AddLine("EmitVertex();");
+        return {};
+    }
+
+    std::string EndPrimitive(Operation operation) {
+        ASSERT_MSG(stage == ShaderStage::Geometry,
+                   "EndPrimitive is expected to be used in a geometry shader.");
+
+        code.AddLine("EndPrimitive();");
+        return {};
+    }
+
+    std::string YNegate(Operation operation) {
+        // Config pack's third value is Y_NEGATE's state.
+        return "uintBitsToFloat(config_pack[2])";
+    }
+
+    static constexpr OperationDecompilersArray operation_decompilers = {
+        &GLSLDecompiler::Assign,
+
+        &GLSLDecompiler::Select,
+
+        &GLSLDecompiler::Add<Type::Float>,
+        &GLSLDecompiler::Mul<Type::Float>,
+        &GLSLDecompiler::Div<Type::Float>,
+        &GLSLDecompiler::Fma<Type::Float>,
+        &GLSLDecompiler::Negate<Type::Float>,
+        &GLSLDecompiler::Absolute<Type::Float>,
+        &GLSLDecompiler::FClamp,
+        &GLSLDecompiler::Min<Type::Float>,
+        &GLSLDecompiler::Max<Type::Float>,
+        &GLSLDecompiler::FCos,
+        &GLSLDecompiler::FSin,
+        &GLSLDecompiler::FExp2,
+        &GLSLDecompiler::FLog2,
+        &GLSLDecompiler::FInverseSqrt,
+        &GLSLDecompiler::FSqrt,
+        &GLSLDecompiler::FRoundEven,
+        &GLSLDecompiler::FFloor,
+        &GLSLDecompiler::FCeil,
+        &GLSLDecompiler::FTrunc,
+        &GLSLDecompiler::FCastInteger<Type::Int>,
+        &GLSLDecompiler::FCastInteger<Type::Uint>,
+
+        &GLSLDecompiler::Add<Type::Int>,
+        &GLSLDecompiler::Mul<Type::Int>,
+        &GLSLDecompiler::Div<Type::Int>,
+        &GLSLDecompiler::Negate<Type::Int>,
+        &GLSLDecompiler::Absolute<Type::Int>,
+        &GLSLDecompiler::Min<Type::Int>,
+        &GLSLDecompiler::Max<Type::Int>,
+
+        &GLSLDecompiler::ICastFloat,
+        &GLSLDecompiler::ICastUnsigned,
+        &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
+        &GLSLDecompiler::ILogicalShiftRight,
+        &GLSLDecompiler::IArithmeticShiftRight,
+        &GLSLDecompiler::BitwiseAnd<Type::Int>,
+        &GLSLDecompiler::BitwiseOr<Type::Int>,
+        &GLSLDecompiler::BitwiseXor<Type::Int>,
+        &GLSLDecompiler::BitwiseNot<Type::Int>,
+        &GLSLDecompiler::BitfieldInsert<Type::Int>,
+        &GLSLDecompiler::BitfieldExtract<Type::Int>,
+        &GLSLDecompiler::BitCount<Type::Int>,
+
+        &GLSLDecompiler::Add<Type::Uint>,
+        &GLSLDecompiler::Mul<Type::Uint>,
+        &GLSLDecompiler::Div<Type::Uint>,
+        &GLSLDecompiler::Min<Type::Uint>,
+        &GLSLDecompiler::Max<Type::Uint>,
+        &GLSLDecompiler::UCastFloat,
+        &GLSLDecompiler::UCastSigned,
+        &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
+        &GLSLDecompiler::UShiftRight,
+        &GLSLDecompiler::UShiftRight,
+        &GLSLDecompiler::BitwiseAnd<Type::Uint>,
+        &GLSLDecompiler::BitwiseOr<Type::Uint>,
+        &GLSLDecompiler::BitwiseXor<Type::Uint>,
+        &GLSLDecompiler::BitwiseNot<Type::Uint>,
+        &GLSLDecompiler::BitfieldInsert<Type::Uint>,
+        &GLSLDecompiler::BitfieldExtract<Type::Uint>,
+        &GLSLDecompiler::BitCount<Type::Uint>,
+
+        &GLSLDecompiler::Add<Type::HalfFloat>,
+        &GLSLDecompiler::Mul<Type::HalfFloat>,
+        &GLSLDecompiler::Fma<Type::HalfFloat>,
+        &GLSLDecompiler::Absolute<Type::HalfFloat>,
+        &GLSLDecompiler::HNegate,
+        &GLSLDecompiler::HMergeF32,
+        &GLSLDecompiler::HMergeH0,
+        &GLSLDecompiler::HMergeH1,
+        &GLSLDecompiler::HPack2,
+
+        &GLSLDecompiler::LogicalAssign,
+        &GLSLDecompiler::LogicalAnd,
+        &GLSLDecompiler::LogicalOr,
+        &GLSLDecompiler::LogicalXor,
+        &GLSLDecompiler::LogicalNegate,
+        &GLSLDecompiler::LogicalPick2,
+        &GLSLDecompiler::LogicalAll2,
+        &GLSLDecompiler::LogicalAny2,
+
+        &GLSLDecompiler::LogicalLessThan<Type::Float>,
+        &GLSLDecompiler::LogicalEqual<Type::Float>,
+        &GLSLDecompiler::LogicalLessEqual<Type::Float>,
+        &GLSLDecompiler::LogicalGreaterThan<Type::Float>,
+        &GLSLDecompiler::LogicalNotEqual<Type::Float>,
+        &GLSLDecompiler::LogicalGreaterEqual<Type::Float>,
+        &GLSLDecompiler::LogicalFIsNan,
+
+        &GLSLDecompiler::LogicalLessThan<Type::Int>,
+        &GLSLDecompiler::LogicalEqual<Type::Int>,
+        &GLSLDecompiler::LogicalLessEqual<Type::Int>,
+        &GLSLDecompiler::LogicalGreaterThan<Type::Int>,
+        &GLSLDecompiler::LogicalNotEqual<Type::Int>,
+        &GLSLDecompiler::LogicalGreaterEqual<Type::Int>,
+
+        &GLSLDecompiler::LogicalLessThan<Type::Uint>,
+        &GLSLDecompiler::LogicalEqual<Type::Uint>,
+        &GLSLDecompiler::LogicalLessEqual<Type::Uint>,
+        &GLSLDecompiler::LogicalGreaterThan<Type::Uint>,
+        &GLSLDecompiler::LogicalNotEqual<Type::Uint>,
+        &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
+
+        &GLSLDecompiler::Logical2HLessThan,
+        &GLSLDecompiler::Logical2HEqual,
+        &GLSLDecompiler::Logical2HLessEqual,
+        &GLSLDecompiler::Logical2HGreaterThan,
+        &GLSLDecompiler::Logical2HNotEqual,
+        &GLSLDecompiler::Logical2HGreaterEqual,
+
+        &GLSLDecompiler::Texture,
+        &GLSLDecompiler::TextureLod,
+        &GLSLDecompiler::TextureGather,
+        &GLSLDecompiler::TextureQueryDimensions,
+        &GLSLDecompiler::TextureQueryLod,
+        &GLSLDecompiler::TexelFetch,
+
+        &GLSLDecompiler::Branch,
+        &GLSLDecompiler::PushFlowStack,
+        &GLSLDecompiler::PopFlowStack,
+        &GLSLDecompiler::Exit,
+        &GLSLDecompiler::Discard,
+
+        &GLSLDecompiler::EmitVertex,
+        &GLSLDecompiler::EndPrimitive,
+
+        &GLSLDecompiler::YNegate,
+    };
 
-                // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
-                // unlikely that shaders will use 20 nested SSYs and PBKs.
-                constexpr u32 FLOW_STACK_SIZE = 20;
-                shader.AddLine("uint flow_stack[" + std::to_string(FLOW_STACK_SIZE) + "];");
-                shader.AddLine("uint flow_stack_top = 0u;");
+    std::string GetRegister(u32 index) const {
+        return GetDeclarationWithSuffix(index, "gpr");
+    }
 
-                shader.AddLine("while (true) {");
-                ++shader.scope;
+    std::string GetPredicate(Tegra::Shader::Pred pred) const {
+        return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
+    }
 
-                shader.AddLine("switch (jmp_to) {");
+    std::string GetInputAttribute(Attribute::Index attribute) const {
+        const auto index{static_cast<u32>(attribute) -
+                         static_cast<u32>(Attribute::Index::Attribute_0)};
+        return GetDeclarationWithSuffix(index, "input_attr");
+    }
 
-                for (auto label : labels) {
-                    shader.AddLine("case " + std::to_string(label) + "u: {");
-                    ++shader.scope;
+    std::string GetOutputAttribute(Attribute::Index attribute) const {
+        const auto index{static_cast<u32>(attribute) -
+                         static_cast<u32>(Attribute::Index::Attribute_0)};
+        return GetDeclarationWithSuffix(index, "output_attr");
+    }
 
-                    const auto next_it = labels.lower_bound(label + 1);
-                    const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+    std::string GetConstBuffer(u32 index) const {
+        return GetDeclarationWithSuffix(index, "cbuf");
+    }
 
-                    const u32 compile_end = CompileRange(label, next_label);
-                    if (compile_end > next_label && compile_end != PROGRAM_END) {
-                        // This happens only when there is a label inside a IF/LOOP block
-                        shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
-                        labels.emplace(compile_end);
-                    }
+    std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
+        return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
+    }
 
-                    --shader.scope;
-                    shader.AddLine('}');
-                }
+    std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
+        return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
+                           suffix);
+    }
 
-                shader.AddLine("default: return false;");
-                shader.AddLine('}');
+    std::string GetConstBufferBlock(u32 index) const {
+        return GetDeclarationWithSuffix(index, "cbuf_block");
+    }
 
-                --shader.scope;
-                shader.AddLine('}');
+    std::string GetLocalMemory() const {
+        return "lmem_" + suffix;
+    }
 
-                shader.AddLine("return false;");
-            }
+    std::string GetInternalFlag(InternalFlag flag) const {
+        constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag",
+                                                                  "carry_flag", "overflow_flag"};
+        const auto index = static_cast<u32>(flag);
+        ASSERT(index < static_cast<u32>(InternalFlag::Amount));
 
-            --shader.scope;
-            shader.AddLine("}\n");
+        return std::string(InternalFlagNames[index]) + '_' + suffix;
+    }
 
-            DEBUG_ASSERT(shader.scope == 0);
-        }
+    std::string GetSampler(const Sampler& sampler) const {
+        return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
+    }
 
-        GenerateDeclarations();
+    std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
+        return name + '_' + std::to_string(index) + '_' + suffix;
     }
 
-    /// Add declarations for registers
-    void GenerateDeclarations() {
-        regs.GenerateDeclarations(suffix);
+    const ShaderIR& ir;
+    const ShaderStage stage;
+    const std::string suffix;
+    const Header header;
 
-        for (const auto& pred : declr_predicates) {
-            declarations.AddLine("bool " + pred + " = false;");
-        }
-        declarations.AddNewLine();
-    }
+    ShaderWriter code;
+};
 
-private:
-    const std::set<Subroutine>& subroutines;
-    const ProgramCode& program_code;
-    Tegra::Shader::Header header;
-    const u32 main_offset;
-    Maxwell3D::Regs::ShaderStage stage;
-    const std::string& suffix;
-    u64 local_memory_size;
-    std::size_t shader_length;
-
-    ShaderWriter shader;
-    ShaderWriter declarations;
-    GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
-
-    // Declarations
-    std::set<std::string> declr_predicates;
-}; // namespace OpenGL::GLShader::Decompiler
+} // Anonymous namespace
 
 std::string GetCommonDeclarations() {
-    return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",
-                       RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4));
+    const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
+    const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
+    return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
+           "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
+           "#define ftoi floatBitsToInt\n"
+           "#define ftou floatBitsToUint\n"
+           "#define itof intBitsToFloat\n"
+           "#define utof uintBitsToFloat\n\n"
+           "float fromHalf2(vec2 pair) {\n"
+           "    return utof(packHalf2x16(pair));\n"
+           "}\n\n"
+           "vec2 toHalf2(float value) {\n"
+           "    return unpackHalf2x16(ftou(value));\n"
+           "}\n";
 }
 
-std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
-                                              Maxwell3D::Regs::ShaderStage stage,
-                                              const std::string& suffix) {
-    try {
-        ControlFlowAnalyzer analyzer(program_code, main_offset, suffix);
-        const auto subroutines = analyzer.GetSubroutines();
-        GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix,
-                                analyzer.GetShaderLength());
-        return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
-    } catch (const DecompileFail& exception) {
-        LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
-    }
-    return {};
+ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) {
+    GLSLDecompiler decompiler(ir, stage, suffix);
+    decompiler.Decompile();
+    return {decompiler.GetResult(), decompiler.GetShaderEntries()};
 }
 
-} // namespace OpenGL::GLShader::Decompiler
+} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index d01a4a7ee..4e04ab2f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,21 +5,67 @@
 #pragma once
 
 #include <array>
-#include <functional>
-#include <optional>
 #include <string>
+#include <utility>
+#include <vector>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/shader/shader_ir.h"
 
-namespace OpenGL::GLShader::Decompiler {
+namespace VideoCommon::Shader {
+class ShaderIR;
+}
 
-using Tegra::Engines::Maxwell3D;
+namespace OpenGL::GLShader {
+
+struct ShaderEntries;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ProgramResult = std::pair<std::string, ShaderEntries>;
+using SamplerEntry = VideoCommon::Shader::Sampler;
+
+class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
+public:
+    explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
+        : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
+
+    u32 GetIndex() const {
+        return index;
+    }
+
+private:
+    u32 index{};
+};
+
+class GlobalMemoryEntry {
+public:
+    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+
+    u32 GetCbufIndex() const {
+        return cbuf_index;
+    }
+
+    u32 GetCbufOffset() const {
+        return cbuf_offset;
+    }
+
+private:
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+};
+
+struct ShaderEntries {
+    std::vector<ConstBufferEntry> const_buffers;
+    std::vector<SamplerEntry> samplers;
+    std::vector<GlobalMemoryEntry> global_memory_entries;
+    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+    std::size_t shader_length{};
+};
 
 std::string GetCommonDeclarations();
 
-std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
-                                              Maxwell3D::Regs::ShaderStage stage,
-                                              const std::string& suffix);
+ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
+                        const std::string& suffix);
 
-} // namespace OpenGL::GLShader::Decompiler
+} // namespace OpenGL::GLShader
+\ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
new file mode 100644
index 000000000..8a43eb157
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -0,0 +1,624 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/common_paths.h"
+#include "common/common_types.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "common/scm_rev.h"
+#include "common/zstd_compression.h"
+
+#include "core/core.h"
+#include "core/hle/kernel/process.h"
+#include "core/settings.h"
+
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+
+namespace OpenGL {
+
+using ShaderCacheVersionHash = std::array<u8, 64>;
+
+enum class TransferableEntryKind : u32 {
+    Raw,
+    Usage,
+};
+
+enum class PrecompiledEntryKind : u32 {
+    Decompiled,
+    Dump,
+};
+
+constexpr u32 NativeVersion = 1;
+
+// Making sure sizes doesn't change by accident
+static_assert(sizeof(BaseBindings) == 12);
+static_assert(sizeof(ShaderDiskCacheUsage) == 24);
+
+namespace {
+
+ShaderCacheVersionHash GetShaderCacheVersionHash() {
+    ShaderCacheVersionHash hash{};
+    const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
+    std::memcpy(hash.data(), Common::g_shader_cache_version, length);
+    return hash;
+}
+
+} // namespace
+
+ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                                       u32 program_code_size, u32 program_code_size_b,
+                                       ProgramCode program_code, ProgramCode program_code_b)
+    : unique_identifier{unique_identifier}, program_type{program_type},
+      program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
+      program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
+
+ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
+
+ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
+
+bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
+    if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
+        file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
+        return false;
+    }
+    u32 program_code_size{};
+    u32 program_code_size_b{};
+    if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
+        file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
+        return false;
+    }
+
+    program_code.resize(program_code_size);
+    program_code_b.resize(program_code_size_b);
+
+    if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
+        return false;
+
+    if (HasProgramA() &&
+        file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+        return false;
+    }
+    return true;
+}
+
+bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
+    if (file.WriteObject(unique_identifier) != 1 ||
+        file.WriteObject(static_cast<u32>(program_type)) != 1 ||
+        file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
+        return false;
+    }
+
+    if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
+        return false;
+
+    if (HasProgramA() &&
+        file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+        return false;
+    }
+    return true;
+}
+
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
+ShaderDiskCacheOpenGL::LoadTransferable() {
+    // Skip games without title id
+    const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
+    if (!Settings::values.use_disk_shader_cache || !has_title_id)
+        return {};
+    tried_to_load = true;
+
+    FileUtil::IOFile file(GetTransferablePath(), "rb");
+    if (!file.IsOpen()) {
+        LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
+                 GetTitleID());
+        return {};
+    }
+
+    u32 version{};
+    if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
+        LOG_ERROR(Render_OpenGL,
+                  "Failed to get transferable cache version for title id={} - skipping",
+                  GetTitleID());
+        return {};
+    }
+
+    if (version < NativeVersion) {
+        LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
+        file.Close();
+        InvalidateTransferable();
+        return {};
+    }
+    if (version > NativeVersion) {
+        LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
+                                   "of the emulator - skipping");
+        return {};
+    }
+
+    // Version is valid, load the shaders
+    std::vector<ShaderDiskCacheRaw> raws;
+    std::vector<ShaderDiskCacheUsage> usages;
+    while (file.Tell() < file.GetSize()) {
+        TransferableEntryKind kind{};
+        if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+            LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
+            return {};
+        }
+
+        switch (kind) {
+        case TransferableEntryKind::Raw: {
+            ShaderDiskCacheRaw entry;
+            if (!entry.Load(file)) {
+                LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
+                return {};
+            }
+            transferable.insert({entry.GetUniqueIdentifier(), {}});
+            raws.push_back(std::move(entry));
+            break;
+        }
+        case TransferableEntryKind::Usage: {
+            ShaderDiskCacheUsage usage{};
+            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
+                LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
+                return {};
+            }
+            usages.push_back(std::move(usage));
+            break;
+        }
+        default:
+            LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
+                      static_cast<u32>(kind));
+            return {};
+        }
+    }
+    return {{raws, usages}};
+}
+
+std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+          std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
+ShaderDiskCacheOpenGL::LoadPrecompiled() {
+    if (!IsUsable())
+        return {};
+
+    FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+    if (!file.IsOpen()) {
+        LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
+                 GetTitleID());
+        return {};
+    }
+
+    const auto result = LoadPrecompiledFile(file);
+    if (!result) {
+        LOG_INFO(Render_OpenGL,
+                 "Failed to load precompiled cache for game with title id={} - removing",
+                 GetTitleID());
+        file.Close();
+        InvalidatePrecompiled();
+        return {};
+    }
+    return *result;
+}
+
+std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+                        std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
+    ShaderCacheVersionHash file_hash{};
+    if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
+        return {};
+    }
+    if (GetShaderCacheVersionHash() != file_hash) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
+        return {};
+    }
+
+    std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
+    std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
+    while (file.Tell() < file.GetSize()) {
+        PrecompiledEntryKind kind{};
+        if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+            return {};
+        }
+
+        switch (kind) {
+        case PrecompiledEntryKind::Decompiled: {
+            u64 unique_identifier{};
+            if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
+                return {};
+
+            const auto entry = LoadDecompiledEntry(file);
+            if (!entry)
+                return {};
+            decompiled.insert({unique_identifier, std::move(*entry)});
+            break;
+        }
+        case PrecompiledEntryKind::Dump: {
+            ShaderDiskCacheUsage usage;
+            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
+                return {};
+
+            ShaderDiskCacheDump dump;
+            if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
+                return {};
+
+            u32 binary_length{};
+            u32 compressed_size{};
+            if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
+                file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
+                return {};
+            }
+
+            std::vector<u8> compressed_binary(compressed_size);
+            if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
+                compressed_binary.size()) {
+                return {};
+            }
+
+            dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
+            if (dump.binary.empty()) {
+                return {};
+            }
+
+            dumps.insert({usage, dump});
+            break;
+        }
+        default:
+            return {};
+        }
+    }
+    return {{decompiled, dumps}};
+}
+
+std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
+    FileUtil::IOFile& file) {
+    u32 code_size{};
+    u32 compressed_code_size{};
+    if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
+        file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
+        return {};
+    }
+
+    std::vector<u8> compressed_code(compressed_code_size);
+    if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+        return {};
+    }
+
+    const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
+    if (code.empty()) {
+        return {};
+    }
+    ShaderDiskCacheDecompiled entry;
+    entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+
+    u32 const_buffers_count{};
+    if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < const_buffers_count; ++i) {
+        u32 max_offset{};
+        u32 index{};
+        u8 is_indirect{};
+        if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
+            return {};
+        }
+        entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+    }
+
+    u32 samplers_count{};
+    if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < samplers_count; ++i) {
+        u64 offset{};
+        u64 index{};
+        u32 type{};
+        u8 is_array{};
+        u8 is_shadow{};
+        if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
+            file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
+            file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
+            file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
+            return {};
+        }
+        entry.entries.samplers.emplace_back(
+            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+            static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
+    }
+
+    u32 global_memory_count{};
+    if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < global_memory_count; ++i) {
+        u32 cbuf_index{};
+        u32 cbuf_offset{};
+        if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
+            return {};
+        }
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
+    }
+
+    for (auto& clip_distance : entry.entries.clip_distances) {
+        u8 clip_distance_raw{};
+        if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
+            return {};
+        clip_distance = clip_distance_raw != 0;
+    }
+
+    u64 shader_length{};
+    if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
+        return {};
+    entry.entries.shader_length = static_cast<std::size_t>(shader_length);
+
+    return entry;
+}
+
+bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
+                                               const std::string& code,
+                                               const std::vector<u8>& compressed_code,
+                                               const GLShader::ShaderEntries& entries) {
+    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
+        file.WriteObject(unique_identifier) != 1 ||
+        file.WriteObject(static_cast<u32>(code.size())) != 1 ||
+        file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
+        file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+        return false;
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
+        return false;
+    for (const auto& cbuf : entries.const_buffers) {
+        if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
+            file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
+            file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
+            return false;
+        }
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
+        return false;
+    for (const auto& sampler : entries.samplers) {
+        if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
+            file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
+            file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
+            file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
+            file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
+            return false;
+        }
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
+        return false;
+    for (const auto& gmem : entries.global_memory_entries) {
+        if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
+            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
+            return false;
+        }
+    }
+
+    for (const bool clip_distance : entries.clip_distances) {
+        if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
+            return false;
+    }
+
+    return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
+}
+
+void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
+    if (!FileUtil::Delete(GetTransferablePath())) {
+        LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
+                  GetTransferablePath());
+    }
+    InvalidatePrecompiled();
+}
+
+void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
+    if (!FileUtil::Delete(GetPrecompiledPath())) {
+        LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
+    if (!IsUsable())
+        return;
+
+    const u64 id = entry.GetUniqueIdentifier();
+    if (transferable.find(id) != transferable.end()) {
+        // The shader already exists
+        return;
+    }
+
+    FileUtil::IOFile file = AppendTransferableFile();
+    if (!file.IsOpen())
+        return;
+    if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
+        LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
+        file.Close();
+        InvalidateTransferable();
+        return;
+    }
+    transferable.insert({id, {}});
+}
+
+void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
+    if (!IsUsable())
+        return;
+
+    const auto it = transferable.find(usage.unique_identifier);
+    ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
+
+    auto& usages{it->second};
+    ASSERT(usages.find(usage) == usages.end());
+    usages.insert(usage);
+
+    FileUtil::IOFile file = AppendTransferableFile();
+    if (!file.IsOpen())
+        return;
+
+    if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
+        LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
+        file.Close();
+        InvalidateTransferable();
+        return;
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
+                                           const GLShader::ShaderEntries& entries) {
+    if (!IsUsable())
+        return;
+
+    const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
+        reinterpret_cast<const u8*>(code.data()), code.size())};
+    if (compressed_code.empty()) {
+        LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
+                  unique_identifier);
+        return;
+    }
+
+    FileUtil::IOFile file = AppendPrecompiledFile();
+    if (!file.IsOpen())
+        return;
+
+    if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
+        LOG_ERROR(Render_OpenGL,
+                  "Failed to save decompiled entry to the precompiled file - removing");
+        file.Close();
+        InvalidatePrecompiled();
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
+    if (!IsUsable())
+        return;
+
+    GLint binary_length{};
+    glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
+
+    GLenum binary_format{};
+    std::vector<u8> binary(binary_length);
+    glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
+
+    const std::vector<u8> compressed_binary =
+        Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
+
+    if (compressed_binary.empty()) {
+        LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
+                  usage.unique_identifier);
+        return;
+    }
+
+    FileUtil::IOFile file = AppendPrecompiledFile();
+    if (!file.IsOpen())
+        return;
+
+    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
+        file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
+        file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
+        file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
+        file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
+            compressed_binary.size()) {
+        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
+                  usage.unique_identifier);
+        file.Close();
+        InvalidatePrecompiled();
+        return;
+    }
+}
+
+bool ShaderDiskCacheOpenGL::IsUsable() const {
+    return tried_to_load && Settings::values.use_disk_shader_cache;
+}
+
+FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
+    if (!EnsureDirectories())
+        return {};
+
+    const auto transferable_path{GetTransferablePath()};
+    const bool existed = FileUtil::Exists(transferable_path);
+
+    FileUtil::IOFile file(transferable_path, "ab");
+    if (!file.IsOpen()) {
+        LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
+        return {};
+    }
+    if (!existed || file.GetSize() == 0) {
+        // If the file didn't exist, write its version
+        if (file.WriteObject(NativeVersion) != 1) {
+            LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
+                      transferable_path);
+            return {};
+        }
+    }
+    return file;
+}
+
+FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
+    if (!EnsureDirectories())
+        return {};
+
+    const auto precompiled_path{GetPrecompiledPath()};
+    const bool existed = FileUtil::Exists(precompiled_path);
+
+    FileUtil::IOFile file(precompiled_path, "ab");
+    if (!file.IsOpen()) {
+        LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
+        return {};
+    }
+
+    if (!existed || file.GetSize() == 0) {
+        const auto hash{GetShaderCacheVersionHash()};
+        if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
+            LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
+                      precompiled_path);
+            return {};
+        }
+    }
+    return file;
+}
+
+bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
+    const auto CreateDir = [](const std::string& dir) {
+        if (!FileUtil::CreateDir(dir)) {
+            LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
+            return false;
+        }
+        return true;
+    };
+
+    return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
+           CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
+           CreateDir(GetPrecompiledDir());
+}
+
+std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
+    return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+}
+
+std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
+    return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+}
+
+std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
+    return GetBaseDir() + DIR_SEP "transferable";
+}
+
+std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
+    return GetBaseDir() + DIR_SEP "precompiled";
+}
+
+std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
+    return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
+}
+
+std::string ShaderDiskCacheOpenGL::GetTitleID() const {
+    return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
new file mode 100644
index 000000000..6be0c0547
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -0,0 +1,245 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+
+namespace Core {
+class System;
+}
+
+namespace FileUtil {
+class IOFile;
+}
+
+namespace OpenGL {
+
+using ProgramCode = std::vector<u64>;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+/// Allocated bindings used by an OpenGL shader program
+struct BaseBindings {
+    u32 cbuf{};
+    u32 gmem{};
+    u32 sampler{};
+
+    bool operator==(const BaseBindings& rhs) const {
+        return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
+    }
+
+    bool operator!=(const BaseBindings& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+/// Describes how a shader is used
+struct ShaderDiskCacheUsage {
+    u64 unique_identifier{};
+    BaseBindings bindings;
+    GLenum primitive{};
+
+    bool operator==(const ShaderDiskCacheUsage& rhs) const {
+        return std::tie(unique_identifier, bindings, primitive) ==
+               std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
+    }
+
+    bool operator!=(const ShaderDiskCacheUsage& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+} // namespace OpenGL
+
+namespace std {
+
+template <>
+struct hash<OpenGL::BaseBindings> {
+    std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+        return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
+    }
+};
+
+template <>
+struct hash<OpenGL::ShaderDiskCacheUsage> {
+    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+        return static_cast<std::size_t>(usage.unique_identifier) ^
+               std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
+    }
+};
+
+} // namespace std
+
+namespace OpenGL {
+
+/// Describes a shader how it's used by the guest GPU
+class ShaderDiskCacheRaw {
+public:
+    explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                                u32 program_code_size, u32 program_code_size_b,
+                                ProgramCode program_code, ProgramCode program_code_b);
+    ShaderDiskCacheRaw();
+    ~ShaderDiskCacheRaw();
+
+    bool Load(FileUtil::IOFile& file);
+
+    bool Save(FileUtil::IOFile& file) const;
+
+    u64 GetUniqueIdentifier() const {
+        return unique_identifier;
+    }
+
+    bool HasProgramA() const {
+        return program_type == Maxwell::ShaderProgram::VertexA;
+    }
+
+    Maxwell::ShaderProgram GetProgramType() const {
+        return program_type;
+    }
+
+    Maxwell::ShaderStage GetProgramStage() const {
+        switch (program_type) {
+        case Maxwell::ShaderProgram::VertexA:
+        case Maxwell::ShaderProgram::VertexB:
+            return Maxwell::ShaderStage::Vertex;
+        case Maxwell::ShaderProgram::TesselationControl:
+            return Maxwell::ShaderStage::TesselationControl;
+        case Maxwell::ShaderProgram::TesselationEval:
+            return Maxwell::ShaderStage::TesselationEval;
+        case Maxwell::ShaderProgram::Geometry:
+            return Maxwell::ShaderStage::Geometry;
+        case Maxwell::ShaderProgram::Fragment:
+            return Maxwell::ShaderStage::Fragment;
+        }
+        UNREACHABLE();
+    }
+
+    const ProgramCode& GetProgramCode() const {
+        return program_code;
+    }
+
+    const ProgramCode& GetProgramCodeB() const {
+        return program_code_b;
+    }
+
+private:
+    u64 unique_identifier{};
+    Maxwell::ShaderProgram program_type{};
+    u32 program_code_size{};
+    u32 program_code_size_b{};
+
+    ProgramCode program_code;
+    ProgramCode program_code_b;
+};
+
+/// Contains decompiled data from a shader
+struct ShaderDiskCacheDecompiled {
+    std::string code;
+    GLShader::ShaderEntries entries;
+};
+
+/// Contains an OpenGL dumped binary program
+struct ShaderDiskCacheDump {
+    GLenum binary_format;
+    std::vector<u8> binary;
+};
+
+class ShaderDiskCacheOpenGL {
+public:
+    explicit ShaderDiskCacheOpenGL(Core::System& system);
+
+    /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
+    std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
+    LoadTransferable();
+
+    /// Loads current game's precompiled cache. Invalidates on failure.
+    std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+              std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
+    LoadPrecompiled();
+
+    /// Removes the transferable (and precompiled) cache file.
+    void InvalidateTransferable() const;
+
+    /// Removes the precompiled cache file.
+    void InvalidatePrecompiled() const;
+
+    /// Saves a raw dump to the transferable file. Checks for collisions.
+    void SaveRaw(const ShaderDiskCacheRaw& entry);
+
+    /// Saves shader usage to the transferable file. Does not check for collisions.
+    void SaveUsage(const ShaderDiskCacheUsage& usage);
+
+    /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
+    void SaveDecompiled(u64 unique_identifier, const std::string& code,
+                        const GLShader::ShaderEntries& entries);
+
+    /// Saves a dump entry to the precompiled file. Does not check for collisions.
+    void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
+
+private:
+    /// Loads the transferable cache. Returns empty on failure.
+    std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+                            std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+    LoadPrecompiledFile(FileUtil::IOFile& file);
+
+    /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
+    std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
+
+    /// Saves a decompiled entry to the passed file. Returns true on success.
+    bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
+                            const std::vector<u8>& compressed_code,
+                            const GLShader::ShaderEntries& entries);
+
+    /// Returns if the cache can be used
+    bool IsUsable() const;
+
+    /// Opens current game's transferable file and write it's header if it doesn't exist
+    FileUtil::IOFile AppendTransferableFile() const;
+
+    /// Opens current game's precompiled file and write it's header if it doesn't exist
+    FileUtil::IOFile AppendPrecompiledFile() const;
+
+    /// Create shader disk cache directories. Returns true on success.
+    bool EnsureDirectories() const;
+
+    /// Gets current game's transferable file path
+    std::string GetTransferablePath() const;
+
+    /// Gets current game's precompiled file path
+    std::string GetPrecompiledPath() const;
+
+    /// Get user's transferable directory path
+    std::string GetTransferableDir() const;
+
+    /// Get user's precompiled directory path
+    std::string GetPrecompiledDir() const;
+
+    /// Get user's shader directory path
+    std::string GetBaseDir() const;
+
+    /// Get current game's title id
+    std::string GetTitleID() const;
+
+    // Copre system
+    Core::System& system;
+    // Stored transferable shaders
+    std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
+    // The cache has been loaded at boot
+    bool tried_to_load{};
+};
+
+} // namespace OpenGL
+\ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 5d0819dc5..8763d9c71 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,67 +3,60 @@
 // Refer to the license.txt file included.
 
 #include <fmt/format.h>
-#include "common/assert.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/shader/shader_ir.h"
 
 namespace OpenGL::GLShader {
 
 using Tegra::Engines::Maxwell3D;
+using VideoCommon::Shader::ProgramCode;
+using VideoCommon::Shader::ShaderIR;
 
 static constexpr u32 PROGRAM_OFFSET{10};
 
 ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
-    std::string out = "#version 430 core\n";
-    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+
+    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
     out += "// Shader Unique Id: VS" + id + "\n\n";
-    out += Decompiler::GetCommonDeclarations();
+    out += GetCommonDeclarations();
 
     out += R"(
-
 layout (location = 0) out vec4 position;
 
-layout(std140) uniform vs_config {
+layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
     vec4 viewport_flip;
     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
     uvec4 alpha_test;
 };
-)";
-
-    if (setup.IsDualProgram()) {
-        out += "bool exec_vertex_b();\n";
-    }
 
-    ProgramResult program =
-        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
-                                     Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
-            .value_or(ProgramResult());
+)";
+    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
 
     out += program.first;
 
     if (setup.IsDualProgram()) {
+        ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
         ProgramResult program_b =
-            Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
-                                         Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
-                .value_or(ProgramResult());
+            Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
+
         out += program_b.first;
     }
 
     out += R"(
-
 void main() {
     position = vec4(0.0, 0.0, 0.0, 0.0);
-    exec_vertex();
+    execute_vertex();
 )";
 
     if (setup.IsDualProgram()) {
-        out += "    exec_vertex_b();";
+        out += "    execute_vertex_b();";
     }
 
     out += R"(
-
     // Check if the flip stage is VertexB
     // Config pack's second value is flip_stage
     if (config_pack[1] == 1) {
@@ -77,73 +70,62 @@ void main() {
     if (config_pack[1] == 1) {
         position.w = 1.0;
     }
-}
-
-)";
+})";
 
     return {out, program.second};
 }
 
 ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
-    // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
-    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+
+    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
     out += "// Shader Unique Id: GS" + id + "\n\n";
-    out += Decompiler::GetCommonDeclarations();
-    out += "bool exec_geometry();\n";
+    out += GetCommonDeclarations();
 
-    ProgramResult program =
-        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
-                                     Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
-            .value_or(ProgramResult());
     out += R"(
-out gl_PerVertex {
-    vec4 gl_Position;
-};
-
 layout (location = 0) in vec4 gs_position[];
 layout (location = 0) out vec4 position;
 
-layout (std140) uniform gs_config {
+layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
     vec4 viewport_flip;
     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
     uvec4 alpha_test;
 };
 
-void main() {
-    exec_geometry();
-}
-
 )";
+    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    ProgramResult program =
+        Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
     out += program.first;
+
+    out += R"(
+void main() {
+    execute_geometry();
+};)";
+
     return {out, program.second};
 }
 
 ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
-    std::string out = "#version 430 core\n";
-    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+
+    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
     out += "// Shader Unique Id: FS" + id + "\n\n";
-    out += Decompiler::GetCommonDeclarations();
-    out += "bool exec_fragment();\n";
+    out += GetCommonDeclarations();
 
-    ProgramResult program =
-        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
-                                     Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
-            .value_or(ProgramResult());
     out += R"(
-layout(location = 0) out vec4 FragColor0;
-layout(location = 1) out vec4 FragColor1;
-layout(location = 2) out vec4 FragColor2;
-layout(location = 3) out vec4 FragColor3;
-layout(location = 4) out vec4 FragColor4;
-layout(location = 5) out vec4 FragColor5;
-layout(location = 6) out vec4 FragColor6;
-layout(location = 7) out vec4 FragColor7;
-
-layout (location = 0) in vec4 position;
-
-layout (std140) uniform fs_config {
+layout (location = 0) out vec4 FragColor0;
+layout (location = 1) out vec4 FragColor1;
+layout (location = 2) out vec4 FragColor2;
+layout (location = 3) out vec4 FragColor3;
+layout (location = 4) out vec4 FragColor4;
+layout (location = 5) out vec4 FragColor5;
+layout (location = 6) out vec4 FragColor6;
+layout (location = 7) out vec4 FragColor7;
+
+layout (location = 0) in noperspective vec4 position;
+
+layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
     vec4 viewport_flip;
     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
     uvec4 alpha_test;
@@ -173,12 +155,20 @@ bool AlphaFunc(in float value) {
     }
 }
 
+)";
+    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    ProgramResult program =
+        Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
+
+    out += program.first;
+
+    out += R"(
 void main() {
-    exec_fragment();
+    execute_fragment();
 }
 
 )";
-    out += program.first;
     return {out, program.second};
 }
+
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fcc20d3b4..fad346b48 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,170 +4,15 @@
 
 #pragma once
 
-#include <array>
-#include <string>
 #include <vector>
 
 #include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/shader/shader_ir.h"
 
 namespace OpenGL::GLShader {
 
-constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
-using ProgramCode = std::vector<u64>;
-
-enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
-
-class ConstBufferEntry {
-    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-public:
-    void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
-        is_used = true;
-        this->index = static_cast<unsigned>(index);
-        this->stage = stage;
-        max_offset = std::max(max_offset, static_cast<unsigned>(offset));
-    }
-
-    void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
-        is_used = true;
-        is_indirect = true;
-        this->index = static_cast<unsigned>(index);
-        this->stage = stage;
-    }
-
-    bool IsUsed() const {
-        return is_used;
-    }
-
-    bool IsIndirect() const {
-        return is_indirect;
-    }
-
-    unsigned GetIndex() const {
-        return index;
-    }
-
-    unsigned GetSize() const {
-        return max_offset + 1;
-    }
-
-    std::string GetName() const {
-        return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
-    }
-
-    u32 GetHash() const {
-        return (static_cast<u32>(stage) << 16) | index;
-    }
-
-private:
-    static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
-        "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
-    };
-
-    bool is_used{};
-    bool is_indirect{};
-    unsigned index{};
-    unsigned max_offset{};
-    Maxwell::ShaderStage stage;
-};
-
-class SamplerEntry {
-    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-public:
-    SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
-                 Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
-        : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array),
-          is_shadow(is_shadow) {}
-
-    std::size_t GetOffset() const {
-        return offset;
-    }
-
-    std::size_t GetIndex() const {
-        return sampler_index;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
-    std::string GetName() const {
-        return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
-               std::to_string(sampler_index);
-    }
-
-    std::string GetTypeString() const {
-        using Tegra::Shader::TextureType;
-        std::string glsl_type;
-
-        switch (type) {
-        case TextureType::Texture1D:
-            glsl_type = "sampler1D";
-            break;
-        case TextureType::Texture2D:
-            glsl_type = "sampler2D";
-            break;
-        case TextureType::Texture3D:
-            glsl_type = "sampler3D";
-            break;
-        case TextureType::TextureCube:
-            glsl_type = "samplerCube";
-            break;
-        default:
-            UNIMPLEMENTED();
-        }
-        if (is_array)
-            glsl_type += "Array";
-        if (is_shadow)
-            glsl_type += "Shadow";
-        return glsl_type;
-    }
-
-    Tegra::Shader::TextureType GetType() const {
-        return type;
-    }
-
-    bool IsArray() const {
-        return is_array;
-    }
-
-    bool IsShadow() const {
-        return is_shadow;
-    }
-
-    u32 GetHash() const {
-        return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
-    }
-
-    static std::string GetArrayName(Maxwell::ShaderStage stage) {
-        return TextureSamplerNames[static_cast<std::size_t>(stage)];
-    }
-
-private:
-    static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
-        "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
-    };
-
-    /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
-    /// instruction.
-    std::size_t offset;
-    Maxwell::ShaderStage stage;      ///< Shader stage where this sampler was used.
-    std::size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.
-    Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
-    bool is_array;  ///< Whether the texture is being sampled as an array texture or not.
-    bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not.
-};
-
-struct ShaderEntries {
-    std::vector<ConstBufferEntry> const_buffer_entries;
-    std::vector<SamplerEntry> texture_samplers;
-    std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
-    std::size_t shader_length;
-};
-
-using ProgramResult = std::pair<std::string, ShaderEntries>;
+using VideoCommon::Shader::ProgramCode;
 
 struct ShaderSetup {
     explicit ShaderSetup(ProgramCode program_code) {
@@ -178,12 +23,10 @@ struct ShaderSetup {
         ProgramCode code;
         ProgramCode code_b; // Used for dual vertex shaders
         u64 unique_identifier;
-        std::size_t real_size;
-        std::size_t real_size_b;
     } program;
 
     /// Used in scenarios where we have a dual vertex shaders
-    void SetProgramB(ProgramCode&& program_b) {
+    void SetProgramB(ProgramCode program_b) {
         program.code_b = std::move(program_b);
         has_program_b = true;
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 6a30c28d2..eaf3e03a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,15 +2,15 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/core.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 
 namespace OpenGL::GLShader {
 
-void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& regs = gpu.regs;
-    const auto& state = gpu.state;
+using Tegra::Engines::Maxwell3D;
+
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+    const auto& regs = maxwell.regs;
+    const auto& state = maxwell.state;
 
     // TODO(bunnei): Support more than one viewport
     viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
@@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
 
     u32 func = static_cast<u32>(regs.alpha_test_func);
     // Normalize the gl variants of opCompare to be the same as the normal variants
-    u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never);
+    const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
     if (func >= op_gl_variant_base) {
         func = func - op_gl_variant_base + 1U;
     }
@@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
 
     // Assign in which stage the position has to be flipped
     // (the last stage before the fragment shader).
-    if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
-        flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+    constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+    if (maxwell.regs.shader_config[geometry_index].enable) {
+        flip_stage = geometry_index;
     } else {
         flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 4970aafed..37dcfefdb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,14 +12,13 @@
 
 namespace OpenGL::GLShader {
 
-using Tegra::Engines::Maxwell3D;
-
 /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-//       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-//       Not following that rule will cause problems on some AMD drivers.
+/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
+///       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+///       Not following that rule will cause problems on some AMD drivers.
 struct MaxwellUniformData {
-    void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
+    void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
+
     alignas(16) GLvec4 viewport_flip;
     struct alignas(16) {
         GLuint instance_id;
@@ -63,7 +62,6 @@ public:
         UpdatePipeline();
         state.draw.shader_program = 0;
         state.draw.program_pipeline = pipeline.handle;
-        state.geometry_shaders.enabled = (gs != 0);
     }
 
 private:
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 285594f50..03b7548c2 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -47,7 +47,7 @@ GLuint LoadShader(const char* source, GLenum type);
  * @returns Handle of the newly created OpenGL program object
  */
 template <typename... T>
-GLuint LoadProgram(bool separable_program, T... shaders) {
+GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
     // Link the program
     LOG_DEBUG(Render_OpenGL, "Linking program...");
 
@@ -58,6 +58,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
     if (separable_program) {
         glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
     }
+    if (hint_retrievable) {
+        glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
+    }
 
     glLinkProgram(program_id);
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index b7ba59350..52d569a1b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,14 +10,62 @@
 
 namespace OpenGL {
 
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
 OpenGLState OpenGLState::cur_state;
 bool OpenGLState::s_rgb_used;
+
+namespace {
+
+template <typename T>
+bool UpdateValue(T& current_value, const T new_value) {
+    const bool changed = current_value != new_value;
+    current_value = new_value;
+    return changed;
+}
+
+template <typename T1, typename T2>
+bool UpdateTie(T1 current_value, const T2 new_value) {
+    const bool changed = current_value != new_value;
+    current_value = new_value;
+    return changed;
+}
+
+void Enable(GLenum cap, bool enable) {
+    if (enable) {
+        glEnable(cap);
+    } else {
+        glDisable(cap);
+    }
+}
+
+void Enable(GLenum cap, GLuint index, bool enable) {
+    if (enable) {
+        glEnablei(cap, index);
+    } else {
+        glDisablei(cap, index);
+    }
+}
+
+void Enable(GLenum cap, bool& current_value, bool new_value) {
+    if (UpdateValue(current_value, new_value))
+        Enable(cap, new_value);
+}
+
+void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
+    if (UpdateValue(current_value, new_value))
+        Enable(cap, index, new_value);
+}
+
+} // namespace
+
 OpenGLState::OpenGLState() {
     // These all match default OpenGL values
-    geometry_shaders.enabled = false;
     framebuffer_srgb.enabled = false;
+
     multisample_control.alpha_to_coverage = false;
     multisample_control.alpha_to_one = false;
+
     cull.enabled = false;
     cull.mode = GL_BACK;
     cull.front_face = GL_CCW;
@@ -28,14 +76,15 @@ OpenGLState::OpenGLState() {
 
     primitive_restart.enabled = false;
     primitive_restart.index = 0;
+
     for (auto& item : color_mask) {
         item.red_enabled = GL_TRUE;
         item.green_enabled = GL_TRUE;
         item.blue_enabled = GL_TRUE;
         item.alpha_enabled = GL_TRUE;
     }
-    stencil.test_enabled = false;
-    auto reset_stencil = [](auto& config) {
+
+    const auto ResetStencil = [](auto& config) {
         config.test_func = GL_ALWAYS;
         config.test_ref = 0;
         config.test_mask = 0xFFFFFFFF;
@@ -44,8 +93,10 @@ OpenGLState::OpenGLState() {
         config.action_depth_pass = GL_KEEP;
         config.action_stencil_fail = GL_KEEP;
     };
-    reset_stencil(stencil.front);
-    reset_stencil(stencil.back);
+    stencil.test_enabled = false;
+    ResetStencil(stencil.front);
+    ResetStencil(stencil.back);
+
     for (auto& item : viewports) {
         item.x = 0;
         item.y = 0;
@@ -59,6 +110,7 @@ OpenGLState::OpenGLState() {
         item.scissor.width = 0;
         item.scissor.height = 0;
     }
+
     for (auto& item : blend) {
         item.enabled = true;
         item.rgb_equation = GL_FUNC_ADD;
@@ -68,11 +120,14 @@ OpenGLState::OpenGLState() {
         item.src_a_func = GL_ONE;
         item.dst_a_func = GL_ZERO;
     }
+
     independant_blend.enabled = false;
+
     blend_color.red = 0.0f;
     blend_color.green = 0.0f;
     blend_color.blue = 0.0f;
     blend_color.alpha = 0.0f;
+
     logic_op.enabled = false;
     logic_op.operation = GL_COPY;
 
@@ -89,9 +144,12 @@ OpenGLState::OpenGLState() {
     clip_distance = {};
 
     point.size = 1;
+
     fragment_color_clamp.enabled = false;
+
     depth_clamp.far_plane = false;
     depth_clamp.near_plane = false;
+
     polygon_offset.fill_enable = false;
     polygon_offset.line_enable = false;
     polygon_offset.point_enable = false;
@@ -101,279 +159,255 @@ OpenGLState::OpenGLState() {
 }
 
 void OpenGLState::ApplyDefaultState() {
+    glEnable(GL_BLEND);
     glDisable(GL_FRAMEBUFFER_SRGB);
     glDisable(GL_CULL_FACE);
     glDisable(GL_DEPTH_TEST);
     glDisable(GL_PRIMITIVE_RESTART);
     glDisable(GL_STENCIL_TEST);
-    glEnable(GL_BLEND);
     glDisable(GL_COLOR_LOGIC_OP);
     glDisable(GL_SCISSOR_TEST);
 }
 
+void OpenGLState::ApplyFramebufferState() const {
+    if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
+    }
+    if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
+    }
+}
+
+void OpenGLState::ApplyVertexArrayState() const {
+    if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
+        glBindVertexArray(draw.vertex_array);
+    }
+}
+
+void OpenGLState::ApplyShaderProgram() const {
+    if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
+        glUseProgram(draw.shader_program);
+    }
+}
+
+void OpenGLState::ApplyProgramPipeline() const {
+    if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
+        glBindProgramPipeline(draw.program_pipeline);
+    }
+}
+
+void OpenGLState::ApplyClipDistances() const {
+    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
+        Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
+               clip_distance[i]);
+    }
+}
+
+void OpenGLState::ApplyPointSize() const {
+    if (UpdateValue(cur_state.point.size, point.size)) {
+        glPointSize(point.size);
+    }
+}
+
+void OpenGLState::ApplyFragmentColorClamp() const {
+    if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
+        glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
+                     fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
+    }
+}
+
+void OpenGLState::ApplyMultisample() const {
+    Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
+           multisample_control.alpha_to_coverage);
+    Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
+           multisample_control.alpha_to_one);
+}
+
+void OpenGLState::ApplyDepthClamp() const {
+    if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
+        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
+        return;
+    }
+    cur_state.depth_clamp = depth_clamp;
+
+    UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+                         "Unimplemented Depth Clamp Separation!");
+
+    Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
+}
+
 void OpenGLState::ApplySRgb() const {
-    // sRGB
-    if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
-        if (framebuffer_srgb.enabled) {
-            // Track if sRGB is used
-            s_rgb_used = true;
-            glEnable(GL_FRAMEBUFFER_SRGB);
-        } else {
-            glDisable(GL_FRAMEBUFFER_SRGB);
-        }
+    if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
+        return;
+    cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
+    if (framebuffer_srgb.enabled) {
+        // Track if sRGB is used
+        s_rgb_used = true;
+        glEnable(GL_FRAMEBUFFER_SRGB);
+    } else {
+        glDisable(GL_FRAMEBUFFER_SRGB);
     }
 }
 
 void OpenGLState::ApplyCulling() const {
-    // Culling
-    const bool cull_changed = cull.enabled != cur_state.cull.enabled;
-    if (cull_changed) {
-        if (cull.enabled) {
-            glEnable(GL_CULL_FACE);
-        } else {
-            glDisable(GL_CULL_FACE);
-        }
+    Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
+
+    if (UpdateValue(cur_state.cull.mode, cull.mode)) {
+        glCullFace(cull.mode);
     }
-    if (cull.enabled) {
-        if (cull_changed || cull.mode != cur_state.cull.mode) {
-            glCullFace(cull.mode);
-        }
 
-        if (cull_changed || cull.front_face != cur_state.cull.front_face) {
-            glFrontFace(cull.front_face);
-        }
+    if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
+        glFrontFace(cull.front_face);
     }
 }
 
 void OpenGLState::ApplyColorMask() const {
-    if (independant_blend.enabled) {
-        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
-            const auto& updated = color_mask[i];
-            const auto& current = cur_state.color_mask[i];
-            if (updated.red_enabled != current.red_enabled ||
-                updated.green_enabled != current.green_enabled ||
-                updated.blue_enabled != current.blue_enabled ||
-                updated.alpha_enabled != current.alpha_enabled) {
-                glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
-                             updated.blue_enabled, updated.alpha_enabled);
-            }
-        }
-    } else {
-        const auto& updated = color_mask[0];
-        const auto& current = cur_state.color_mask[0];
+    for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+        const auto& updated = color_mask[i];
+        auto& current = cur_state.color_mask[i];
         if (updated.red_enabled != current.red_enabled ||
             updated.green_enabled != current.green_enabled ||
             updated.blue_enabled != current.blue_enabled ||
             updated.alpha_enabled != current.alpha_enabled) {
-            glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
-                        updated.alpha_enabled);
+            current = updated;
+            glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+                         updated.blue_enabled, updated.alpha_enabled);
         }
     }
 }
 
 void OpenGLState::ApplyDepth() const {
-    // Depth test
-    const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
-    if (depth_test_changed) {
-        if (depth.test_enabled) {
-            glEnable(GL_DEPTH_TEST);
-        } else {
-            glDisable(GL_DEPTH_TEST);
-        }
-    }
-    if (depth.test_enabled &&
-        (depth_test_changed || depth.test_func != cur_state.depth.test_func)) {
+    Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
+
+    if (cur_state.depth.test_func != depth.test_func) {
+        cur_state.depth.test_func = depth.test_func;
         glDepthFunc(depth.test_func);
     }
-    // Depth mask
-    if (depth.write_mask != cur_state.depth.write_mask) {
+
+    if (cur_state.depth.write_mask != depth.write_mask) {
+        cur_state.depth.write_mask = depth.write_mask;
         glDepthMask(depth.write_mask);
     }
 }
 
 void OpenGLState::ApplyPrimitiveRestart() const {
-    const bool primitive_restart_changed =
-        primitive_restart.enabled != cur_state.primitive_restart.enabled;
-    if (primitive_restart_changed) {
-        if (primitive_restart.enabled) {
-            glEnable(GL_PRIMITIVE_RESTART);
-        } else {
-            glDisable(GL_PRIMITIVE_RESTART);
-        }
-    }
-    if (primitive_restart_changed ||
-        (primitive_restart.enabled &&
-         primitive_restart.index != cur_state.primitive_restart.index)) {
+    Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
+
+    if (cur_state.primitive_restart.index != primitive_restart.index) {
+        cur_state.primitive_restart.index = primitive_restart.index;
         glPrimitiveRestartIndex(primitive_restart.index);
     }
 }
 
 void OpenGLState::ApplyStencilTest() const {
-    const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled;
-    if (stencil_test_changed) {
-        if (stencil.test_enabled) {
-            glEnable(GL_STENCIL_TEST);
-        } else {
-            glDisable(GL_STENCIL_TEST);
+    Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
+
+    const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
+        if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
+            current.test_mask != config.test_mask) {
+            current.test_func = config.test_func;
+            current.test_ref = config.test_ref;
+            current.test_mask = config.test_mask;
+            glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
         }
-    }
-    if (stencil.test_enabled) {
-        auto config_stencil = [stencil_test_changed](GLenum face, const auto& config,
-                                                     const auto& prev_config) {
-            if (stencil_test_changed || config.test_func != prev_config.test_func ||
-                config.test_ref != prev_config.test_ref ||
-                config.test_mask != prev_config.test_mask) {
-                glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
-            }
-            if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail ||
-                config.action_depth_pass != prev_config.action_depth_pass ||
-                config.action_stencil_fail != prev_config.action_stencil_fail) {
-                glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
-                                    config.action_depth_pass);
-            }
-            if (config.write_mask != prev_config.write_mask) {
-                glStencilMaskSeparate(face, config.write_mask);
-            }
-        };
-        config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
-        config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
-    }
-}
-// Viewport does not affects glClearBuffer so emulate viewport using scissor test
-void OpenGLState::EmulateViewportWithScissor() {
-    auto& current = viewports[0];
-    if (current.scissor.enabled) {
-        const GLint left = std::max(current.x, current.scissor.x);
-        const GLint right =
-            std::max(current.x + current.width, current.scissor.x + current.scissor.width);
-        const GLint bottom = std::max(current.y, current.scissor.y);
-        const GLint top =
-            std::max(current.y + current.height, current.scissor.y + current.scissor.height);
-        current.scissor.x = std::max(left, 0);
-        current.scissor.y = std::max(bottom, 0);
-        current.scissor.width = std::max(right - left, 0);
-        current.scissor.height = std::max(top - bottom, 0);
-    } else {
-        current.scissor.enabled = true;
-        current.scissor.x = current.x;
-        current.scissor.y = current.y;
-        current.scissor.width = current.width;
-        current.scissor.height = current.height;
-    }
+        if (current.action_depth_fail != config.action_depth_fail ||
+            current.action_depth_pass != config.action_depth_pass ||
+            current.action_stencil_fail != config.action_stencil_fail) {
+            current.action_depth_fail = config.action_depth_fail;
+            current.action_depth_pass = config.action_depth_pass;
+            current.action_stencil_fail = config.action_stencil_fail;
+            glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
+                                config.action_depth_pass);
+        }
+        if (current.write_mask != config.write_mask) {
+            current.write_mask = config.write_mask;
+            glStencilMaskSeparate(face, config.write_mask);
+        }
+    };
+    ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
+    ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
 }
 
 void OpenGLState::ApplyViewport() const {
-    if (geometry_shaders.enabled) {
-        for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports);
-             i++) {
-            const auto& current = cur_state.viewports[i];
-            const auto& updated = viewports[i];
-            if (updated.x != current.x || updated.y != current.y ||
-                updated.width != current.width || updated.height != current.height) {
-                glViewportIndexedf(
-                    i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
-                    static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
-            }
-            if (updated.depth_range_near != current.depth_range_near ||
-                updated.depth_range_far != current.depth_range_far) {
-                glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
-            }
-            const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
-            if (scissor_changed) {
-                if (updated.scissor.enabled) {
-                    glEnablei(GL_SCISSOR_TEST, i);
-                } else {
-                    glDisablei(GL_SCISSOR_TEST, i);
-                }
-            }
-            if (updated.scissor.enabled &&
-                (scissor_changed || updated.scissor.x != current.scissor.x ||
-                 updated.scissor.y != current.scissor.y ||
-                 updated.scissor.width != current.scissor.width ||
-                 updated.scissor.height != current.scissor.height)) {
-                glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
-                                 updated.scissor.height);
-            }
-        }
-    } else {
-        const auto& current = cur_state.viewports[0];
-        const auto& updated = viewports[0];
-        if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
-            updated.height != current.height) {
-            glViewport(updated.x, updated.y, updated.width, updated.height);
-        }
-        if (updated.depth_range_near != current.depth_range_near ||
-            updated.depth_range_far != current.depth_range_far) {
-            glDepthRange(updated.depth_range_near, updated.depth_range_far);
+    for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
+        const auto& updated = viewports[i];
+        auto& current = cur_state.viewports[i];
+
+        if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
+            current.height != updated.height) {
+            current.x = updated.x;
+            current.y = updated.y;
+            current.width = updated.width;
+            current.height = updated.height;
+            glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
+                               static_cast<GLfloat>(updated.width),
+                               static_cast<GLfloat>(updated.height));
         }
-        const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
-        if (scissor_changed) {
-            if (updated.scissor.enabled) {
-                glEnable(GL_SCISSOR_TEST);
-            } else {
-                glDisable(GL_SCISSOR_TEST);
-            }
+        if (current.depth_range_near != updated.depth_range_near ||
+            current.depth_range_far != updated.depth_range_far) {
+            current.depth_range_near = updated.depth_range_near;
+            current.depth_range_far = updated.depth_range_far;
+            glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
         }
-        if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x ||
-                                        updated.scissor.y != current.scissor.y ||
-                                        updated.scissor.width != current.scissor.width ||
-                                        updated.scissor.height != current.scissor.height)) {
-            glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
-                      updated.scissor.height);
+
+        Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
+
+        if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
+            current.scissor.width != updated.scissor.width ||
+            current.scissor.height != updated.scissor.height) {
+            current.scissor.x = updated.scissor.x;
+            current.scissor.y = updated.scissor.y;
+            current.scissor.width = updated.scissor.width;
+            current.scissor.height = updated.scissor.height;
+            glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
+                             updated.scissor.height);
         }
     }
 }
 
 void OpenGLState::ApplyGlobalBlending() const {
-    const Blend& current = cur_state.blend[0];
     const Blend& updated = blend[0];
-    const bool blend_changed = updated.enabled != current.enabled;
-    if (blend_changed) {
-        if (updated.enabled) {
-            glEnable(GL_BLEND);
-        } else {
-            glDisable(GL_BLEND);
-        }
-    }
-    if (!updated.enabled) {
-        return;
-    }
-    if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
-        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
-        updated.dst_a_func != current.dst_a_func) {
+    Blend& current = cur_state.blend[0];
+
+    Enable(GL_BLEND, current.enabled, updated.enabled);
+
+    if (current.src_rgb_func != updated.src_rgb_func ||
+        current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
+        current.dst_a_func != updated.dst_a_func) {
+        current.src_rgb_func = updated.src_rgb_func;
+        current.dst_rgb_func = updated.dst_rgb_func;
+        current.src_a_func = updated.src_a_func;
+        current.dst_a_func = updated.dst_a_func;
         glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
                             updated.dst_a_func);
     }
 
-    if (blend_changed || updated.rgb_equation != current.rgb_equation ||
-        updated.a_equation != current.a_equation) {
+    if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
+        current.rgb_equation = updated.rgb_equation;
+        current.a_equation = updated.a_equation;
         glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
     }
 }
 
 void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
     const Blend& updated = blend[target];
-    const Blend& current = cur_state.blend[target];
-    const bool blend_changed = updated.enabled != current.enabled || force;
-    if (blend_changed) {
-        if (updated.enabled) {
-            glEnablei(GL_BLEND, static_cast<GLuint>(target));
-        } else {
-            glDisablei(GL_BLEND, static_cast<GLuint>(target));
-        }
-    }
-    if (!updated.enabled) {
-        return;
+    Blend& current = cur_state.blend[target];
+
+    if (current.enabled != updated.enabled || force) {
+        current.enabled = updated.enabled;
+        Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
     }
-    if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
-        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
-        updated.dst_a_func != current.dst_a_func) {
+
+    if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
+                           current.dst_a_func),
+                  std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+                           updated.dst_a_func))) {
         glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
                              updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
     }
 
-    if (blend_changed || updated.rgb_equation != current.rgb_equation ||
-        updated.a_equation != current.a_equation) {
+    if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
+                  std::tie(updated.rgb_equation, updated.a_equation))) {
         glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
                                  updated.a_equation);
     }
@@ -381,202 +415,109 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
 
 void OpenGLState::ApplyBlending() const {
     if (independant_blend.enabled) {
-        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
-            ApplyTargetBlending(i,
-                                independant_blend.enabled != cur_state.independant_blend.enabled);
+        const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
+        for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
+            ApplyTargetBlending(target, force);
         }
     } else {
         ApplyGlobalBlending();
     }
-    if (blend_color.red != cur_state.blend_color.red ||
-        blend_color.green != cur_state.blend_color.green ||
-        blend_color.blue != cur_state.blend_color.blue ||
-        blend_color.alpha != cur_state.blend_color.alpha) {
+    cur_state.independant_blend.enabled = independant_blend.enabled;
+
+    if (UpdateTie(
+            std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
+                     cur_state.blend_color.blue, cur_state.blend_color.alpha),
+            std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
         glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
     }
 }
 
 void OpenGLState::ApplyLogicOp() const {
-    const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled;
-    if (logic_op_changed) {
-        if (logic_op.enabled) {
-            glEnable(GL_COLOR_LOGIC_OP);
-        } else {
-            glDisable(GL_COLOR_LOGIC_OP);
-        }
-    }
+    Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
 
-    if (logic_op.enabled &&
-        (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
+    if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
         glLogicOp(logic_op.operation);
     }
 }
 
 void OpenGLState::ApplyPolygonOffset() const {
-
-    const bool fill_enable_changed =
-        polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
-    const bool line_enable_changed =
-        polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
-    const bool point_enable_changed =
-        polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
-    const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
-    const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
-    const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
-
-    if (fill_enable_changed) {
-        if (polygon_offset.fill_enable) {
-            glEnable(GL_POLYGON_OFFSET_FILL);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_FILL);
-        }
-    }
-
-    if (line_enable_changed) {
-        if (polygon_offset.line_enable) {
-            glEnable(GL_POLYGON_OFFSET_LINE);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_LINE);
-        }
-    }
-
-    if (point_enable_changed) {
-        if (polygon_offset.point_enable) {
-            glEnable(GL_POLYGON_OFFSET_POINT);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_POINT);
-        }
-    }
-
-    if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
-        (factor_changed || units_changed || clamp_changed)) {
-
+    Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
+           polygon_offset.fill_enable);
+    Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
+           polygon_offset.line_enable);
+    Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
+           polygon_offset.point_enable);
+
+    if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
+                           cur_state.polygon_offset.clamp),
+                  std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
         if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
             glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
         } else {
-            glPolygonOffset(polygon_offset.factor, polygon_offset.units);
             UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
                                  "Unimplemented Depth polygon offset clamp.");
+            glPolygonOffset(polygon_offset.factor, polygon_offset.units);
         }
     }
 }
 
 void OpenGLState::ApplyTextures() const {
+    bool has_delta{};
+    std::size_t first{};
+    std::size_t last{};
+    std::array<GLuint, Maxwell::NumTextureSamplers> textures;
+
     for (std::size_t i = 0; i < std::size(texture_units); ++i) {
         const auto& texture_unit = texture_units[i];
-        const auto& cur_state_texture_unit = cur_state.texture_units[i];
-
-        if (texture_unit.texture != cur_state_texture_unit.texture) {
-            glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
-            glBindTexture(texture_unit.target, texture_unit.texture);
-        }
-        // Update the texture swizzle
-        if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r ||
-            texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
-            texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
-            texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
-            std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
-                                         texture_unit.swizzle.b, texture_unit.swizzle.a};
-            glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
+        auto& cur_state_texture_unit = cur_state.texture_units[i];
+        textures[i] = texture_unit.texture;
+        if (cur_state_texture_unit.texture == textures[i])
+            continue;
+        cur_state_texture_unit.texture = textures[i];
+        if (!has_delta) {
+            first = i;
+            has_delta = true;
         }
+        last = i;
+    }
+    if (has_delta) {
+        glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+                       textures.data() + first);
     }
 }
 
 void OpenGLState::ApplySamplers() const {
     bool has_delta{};
-    std::size_t first{}, last{};
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+    std::size_t first{};
+    std::size_t last{};
+    std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
+
     for (std::size_t i = 0; i < std::size(samplers); ++i) {
+        if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
+            continue;
+        cur_state.texture_units[i].sampler = texture_units[i].sampler;
         samplers[i] = texture_units[i].sampler;
-        if (samplers[i] != cur_state.texture_units[i].sampler) {
-            if (!has_delta) {
-                first = i;
-                has_delta = true;
-            }
-            last = i;
+        if (!has_delta) {
+            first = i;
+            has_delta = true;
         }
+        last = i;
     }
     if (has_delta) {
         glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       samplers.data());
-    }
-}
-
-void OpenGLState::ApplyFramebufferState() const {
-    if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
-        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
-    }
-    if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
-        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
-    }
-}
-
-void OpenGLState::ApplyVertexArrayState() const {
-    if (draw.vertex_array != cur_state.draw.vertex_array) {
-        glBindVertexArray(draw.vertex_array);
-    }
-}
-
-void OpenGLState::ApplyDepthClamp() const {
-    if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
-        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
-        return;
-    }
-    if (depth_clamp.far_plane != depth_clamp.near_plane) {
-        UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!");
-    }
-    if (depth_clamp.far_plane || depth_clamp.near_plane) {
-        glEnable(GL_DEPTH_CLAMP);
-    } else {
-        glDisable(GL_DEPTH_CLAMP);
+                       samplers.data() + first);
     }
 }
 
 void OpenGLState::Apply() const {
     ApplyFramebufferState();
     ApplyVertexArrayState();
-
-    // Shader program
-    if (draw.shader_program != cur_state.draw.shader_program) {
-        glUseProgram(draw.shader_program);
-    }
-
-    // Program pipeline
-    if (draw.program_pipeline != cur_state.draw.program_pipeline) {
-        glBindProgramPipeline(draw.program_pipeline);
-    }
-    // Clip distance
-    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
-        if (clip_distance[i] != cur_state.clip_distance[i]) {
-            if (clip_distance[i]) {
-                glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
-            } else {
-                glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
-            }
-        }
-    }
-    // Point
-    if (point.size != cur_state.point.size) {
-        glPointSize(point.size);
-    }
-    if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
-        glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
-                     fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
-    }
-    if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
-        if (multisample_control.alpha_to_coverage) {
-            glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
-        } else {
-            glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
-        }
-    }
-    if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
-        if (multisample_control.alpha_to_one) {
-            glEnable(GL_SAMPLE_ALPHA_TO_ONE);
-        } else {
-            glDisable(GL_SAMPLE_ALPHA_TO_ONE);
-        }
-    }
+    ApplyShaderProgram();
+    ApplyProgramPipeline();
+    ApplyClipDistances();
+    ApplyPointSize();
+    ApplyFragmentColorClamp();
+    ApplyMultisample();
     ApplyDepthClamp();
     ApplyColorMask();
     ApplyViewport();
@@ -590,7 +531,28 @@ void OpenGLState::Apply() const {
     ApplyTextures();
     ApplySamplers();
     ApplyPolygonOffset();
-    cur_state = *this;
+}
+
+void OpenGLState::EmulateViewportWithScissor() {
+    auto& current = viewports[0];
+    if (current.scissor.enabled) {
+        const GLint left = std::max(current.x, current.scissor.x);
+        const GLint right =
+            std::max(current.x + current.width, current.scissor.x + current.scissor.width);
+        const GLint bottom = std::max(current.y, current.scissor.y);
+        const GLint top =
+            std::max(current.y + current.height, current.scissor.y + current.scissor.height);
+        current.scissor.x = std::max(left, 0);
+        current.scissor.y = std::max(bottom, 0);
+        current.scissor.width = std::max(right - left, 0);
+        current.scissor.height = std::max(top - bottom, 0);
+    } else {
+        current.scissor.enabled = true;
+        current.scissor.x = current.x;
+        current.scissor.y = current.y;
+        current.scissor.width = current.width;
+        current.scissor.height = current.height;
+    }
 }
 
 OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index a5a7c0920..41418a7b8 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -54,10 +54,6 @@ public:
     } depth_clamp; // GL_DEPTH_CLAMP
 
     struct {
-        bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
-    } geometry_shaders;
-
-    struct {
         bool enabled;      // GL_CULL_FACE
         GLenum mode;       // GL_CULL_FACE_MODE
         GLenum front_face; // GL_FRONT_FACE
@@ -126,26 +122,14 @@ public:
     struct TextureUnit {
         GLuint texture; // GL_TEXTURE_BINDING_2D
         GLuint sampler; // GL_SAMPLER_BINDING
-        GLenum target;
-        struct {
-            GLint r; // GL_TEXTURE_SWIZZLE_R
-            GLint g; // GL_TEXTURE_SWIZZLE_G
-            GLint b; // GL_TEXTURE_SWIZZLE_B
-            GLint a; // GL_TEXTURE_SWIZZLE_A
-        } swizzle;
 
         void Unbind() {
             texture = 0;
-            swizzle.r = GL_RED;
-            swizzle.g = GL_GREEN;
-            swizzle.b = GL_BLUE;
-            swizzle.a = GL_ALPHA;
         }
 
         void Reset() {
             Unbind();
             sampler = 0;
-            target = GL_TEXTURE_2D;
         }
     };
     std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
@@ -196,34 +180,26 @@ public:
     static OpenGLState GetCurState() {
         return cur_state;
     }
+
     static bool GetsRGBUsed() {
         return s_rgb_used;
     }
+
     static void ClearsRGBUsed() {
         s_rgb_used = false;
     }
+
     /// Apply this state as the current OpenGL state
     void Apply() const;
-    /// Apply only the state affecting the framebuffer
+
     void ApplyFramebufferState() const;
-    /// Apply only the state affecting the vertex array
     void ApplyVertexArrayState() const;
-    /// Set the initial OpenGL state
-    static void ApplyDefaultState();
-    /// Resets any references to the given resource
-    OpenGLState& UnbindTexture(GLuint handle);
-    OpenGLState& ResetSampler(GLuint handle);
-    OpenGLState& ResetProgram(GLuint handle);
-    OpenGLState& ResetPipeline(GLuint handle);
-    OpenGLState& ResetVertexArray(GLuint handle);
-    OpenGLState& ResetFramebuffer(GLuint handle);
-    void EmulateViewportWithScissor();
-
-private:
-    static OpenGLState cur_state;
-    // Workaround for sRGB problems caused by
-    // QT not supporting srgb output
-    static bool s_rgb_used;
+    void ApplyShaderProgram() const;
+    void ApplyProgramPipeline() const;
+    void ApplyClipDistances() const;
+    void ApplyPointSize() const;
+    void ApplyFragmentColorClamp() const;
+    void ApplyMultisample() const;
     void ApplySRgb() const;
     void ApplyCulling() const;
     void ApplyColorMask() const;
@@ -239,6 +215,26 @@ private:
     void ApplySamplers() const;
     void ApplyDepthClamp() const;
     void ApplyPolygonOffset() const;
+
+    /// Set the initial OpenGL state
+    static void ApplyDefaultState();
+
+    /// Resets any references to the given resource
+    OpenGLState& UnbindTexture(GLuint handle);
+    OpenGLState& ResetSampler(GLuint handle);
+    OpenGLState& ResetProgram(GLuint handle);
+    OpenGLState& ResetPipeline(GLuint handle);
+    OpenGLState& ResetVertexArray(GLuint handle);
+    OpenGLState& ResetFramebuffer(GLuint handle);
+
+    /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
+    void EmulateViewportWithScissor();
+
+private:
+    static OpenGLState cur_state;
+
+    // Workaround for sRGB problems caused by QT not supporting srgb output
+    static bool s_rgb_used;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c268c9686..d69cba9c3 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,7 +5,6 @@
 #include <algorithm>
 #include <cstddef>
 #include <cstdlib>
-#include <cstring>
 #include <memory>
 #include <glad/glad.h>
 #include "common/assert.h"
@@ -14,6 +13,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/frontend/emu_window.h"
+#include "core/frontend/scope_acquire_window_context.h"
 #include "core/memory.h"
 #include "core/perf_stats.h"
 #include "core/settings.h"
@@ -97,29 +97,16 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
     return matrix;
 }
 
-ScopeAcquireGLContext::ScopeAcquireGLContext(Core::Frontend::EmuWindow& emu_window_)
-    : emu_window{emu_window_} {
-    if (Settings::values.use_multi_core) {
-        emu_window.MakeCurrent();
-    }
-}
-ScopeAcquireGLContext::~ScopeAcquireGLContext() {
-    if (Settings::values.use_multi_core) {
-        emu_window.DoneCurrent();
-    }
-}
-
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
-    : VideoCore::RendererBase{window} {}
+RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
+    : VideoCore::RendererBase{window}, system{system} {}
 
 RendererOpenGL::~RendererOpenGL() = default;
 
 /// Swap buffers (render frame)
 void RendererOpenGL::SwapBuffers(
     std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-    ScopeAcquireGLContext acquire_context{render_window};
 
-    Core::System::GetInstance().GetPerfStats().EndSystemFrame();
+    system.GetPerfStats().EndSystemFrame();
 
     // Maintain the rasterizer's state as a priority
     OpenGLState prev_state = OpenGLState::GetCurState();
@@ -149,8 +136,8 @@ void RendererOpenGL::SwapBuffers(
 
     render_window.PollEvents();
 
-    Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
-    Core::System::GetInstance().GetPerfStats().BeginSystemFrame();
+    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
+    system.GetPerfStats().BeginSystemFrame();
 
     // Restore the rasterizer state
     prev_state.Apply();
@@ -176,17 +163,14 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
         // Reset the screen info's display texture to its own permanent texture
         screen_info.display_texture = screen_info.texture.resource.handle;
 
-        Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
-                                             Memory::FlushMode::Flush);
+        rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
 
-        VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
-                                       Memory::GetPointer(framebuffer_addr),
-                                       gl_framebuffer_data.data(), true);
+        constexpr u32 linear_bpp = 4;
+        VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
+                                       framebuffer.width, framebuffer.height, bytes_per_pixel,
+                                       linear_bpp, Memory::GetPointer(framebuffer_addr),
+                                       gl_framebuffer_data.data());
 
-        state.texture_units[0].texture = screen_info.texture.resource.handle;
-        state.Apply();
-
-        glActiveTexture(GL_TEXTURE0);
         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
 
         // Update existing texture
@@ -194,14 +178,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
         //       they differ from the LCD resolution.
         // TODO: Applications could theoretically crash yuzu here by specifying too large
         //       framebuffer sizes. We should make sure that this cannot happen.
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
-                        screen_info.texture.gl_format, screen_info.texture.gl_type,
-                        gl_framebuffer_data.data());
+        glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
+                            framebuffer.height, screen_info.texture.gl_format,
+                            screen_info.texture.gl_type, gl_framebuffer_data.data());
 
         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-
-        state.texture_units[0].texture = 0;
-        state.Apply();
     }
 }
 
@@ -211,17 +192,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
  */
 void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                                 const TextureInfo& texture) {
-    state.texture_units[0].texture = texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
-
-    // Update existing texture
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
-
-    state.texture_units[0].texture = 0;
-    state.Apply();
+    const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
+    glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
 }
 
 /**
@@ -261,55 +233,57 @@ void RendererOpenGL::InitOpenGLObjects() {
                               sizeof(ScreenRectVertex));
 
     // Allocate textures for the screen
-    screen_info.texture.resource.Create();
-
-    // Allocation of storage is deferred until the first frame, when we
-    // know the framebuffer size.
+    screen_info.texture.resource.Create(GL_TEXTURE_2D);
 
-    state.texture_units[0].texture = screen_info.texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    const GLuint texture = screen_info.texture.resource.handle;
+    glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
 
     screen_info.display_texture = screen_info.texture.resource.handle;
 
-    state.texture_units[0].texture = 0;
-    state.Apply();
-
     // Clear screen to black
     LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
 }
 
+void RendererOpenGL::AddTelemetryFields() {
+    const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
+    const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
+
+    LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
+    LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
+    LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
+
+    auto& telemetry_session = system.TelemetrySession();
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+}
+
 void RendererOpenGL::CreateRasterizer() {
     if (rasterizer) {
         return;
     }
     // Initialize sRGB Usage
     OpenGLState::ClearsRGBUsed();
-    rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info);
+    rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
 }
 
 void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
                                                  const Tegra::FramebufferConfig& framebuffer) {
-
     texture.width = framebuffer.width;
     texture.height = framebuffer.height;
+    texture.pixel_format = framebuffer.pixel_format;
 
     GLint internal_format;
     switch (framebuffer.pixel_format) {
     case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-        internal_format = GL_RGBA;
+        internal_format = GL_RGBA8;
         texture.gl_format = GL_RGBA;
         texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
         gl_framebuffer_data.resize(texture.width * texture.height * 4);
         break;
     default:
-        internal_format = GL_RGBA;
+        internal_format = GL_RGBA8;
         texture.gl_format = GL_RGBA;
         texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
         gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -318,15 +292,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
         UNREACHABLE();
     }
 
-    state.texture_units[0].texture = texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
-                 texture.gl_format, texture.gl_type, nullptr);
-
-    state.texture_units[0].texture = 0;
-    state.Apply();
+    texture.resource.Release();
+    texture.resource.Create(GL_TEXTURE_2D);
+    glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
 }
 
 void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -368,7 +336,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
     }};
 
     state.texture_units[0].texture = screen_info.display_texture;
-    state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
     // Workaround brigthness problems in SMO by enabling sRGB in the final output
     // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
     state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
@@ -429,7 +396,8 @@ void RendererOpenGL::CaptureScreenshot() {
     GLuint renderbuffer;
     glGenRenderbuffers(1, &renderbuffer);
     glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
-    glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height);
+    glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
+                          layout.height);
     glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
 
     DrawScreen(layout);
@@ -506,24 +474,14 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
 
 /// Initialize the renderer
 bool RendererOpenGL::Init() {
-    ScopeAcquireGLContext acquire_context{render_window};
+    Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
 
     if (GLAD_GL_KHR_debug) {
         glEnable(GL_DEBUG_OUTPUT);
         glDebugMessageCallback(DebugHandler, nullptr);
     }
 
-    const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
-    const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
-    const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
-
-    LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
-    LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
-    LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
-
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+    AddTelemetryFields();
 
     if (!GLAD_GL_VERSION_4_3) {
         return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index b85cc262f..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,6 +12,10 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
 
+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -35,23 +39,13 @@ struct TextureInfo {
 /// Structure used for storing information about the display target for the Switch screen
 struct ScreenInfo {
     GLuint display_texture;
-    const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
+    const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
     TextureInfo texture;
 };
 
-/// Helper class to acquire/release OpenGL context within a given scope
-class ScopeAcquireGLContext : NonCopyable {
-public:
-    explicit ScopeAcquireGLContext(Core::Frontend::EmuWindow& window);
-    ~ScopeAcquireGLContext();
-
-private:
-    Core::Frontend::EmuWindow& emu_window;
-};
-
 class RendererOpenGL : public VideoCore::RendererBase {
 public:
-    explicit RendererOpenGL(Core::Frontend::EmuWindow& window);
+    explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
     ~RendererOpenGL() override;
 
     /// Swap buffers (render frame)
@@ -66,6 +60,7 @@ public:
 
 private:
     void InitOpenGLObjects();
+    void AddTelemetryFields();
     void CreateRasterizer();
 
     void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -82,6 +77,8 @@ private:
     void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                     const TextureInfo& texture);
 
+    Core::System& system;
+
     OpenGLState state;
 
     // OpenGL object IDs
@@ -106,7 +103,7 @@ private:
 
     /// Used for transforming the framebuffer orientation
     Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
-    MathUtil::Rectangle<int> framebuffer_crop_rect;
+    Common::Rectangle<int> framebuffer_crop_rect;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index d84634cb3..84a987371 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,11 +5,39 @@
 #include <string>
 #include <fmt/format.h>
 #include <glad/glad.h>
+#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/renderer_opengl/utils.h"
 
 namespace OpenGL {
 
+BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
+
+BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
+
+void BindBuffersRangePushBuffer::Setup(GLuint first_) {
+    first = first_;
+    buffers.clear();
+    offsets.clear();
+    sizes.clear();
+}
+
+void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
+    buffers.push_back(buffer);
+    offsets.push_back(offset);
+    sizes.push_back(size);
+}
+
+void BindBuffersRangePushBuffer::Bind() const {
+    const std::size_t count{buffers.size()};
+    DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
+    if (count == 0) {
+        return;
+    }
+    glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
+                       sizes.data());
+}
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
     if (!GLAD_GL_KHR_debug) {
         return; // We don't need to throw an error as this is just for debugging
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 1fcb6fc11..aef45c9dc 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -5,11 +5,31 @@
 #pragma once
 
 #include <string>
+#include <vector>
 #include <glad/glad.h>
 #include "common/common_types.h"
 
 namespace OpenGL {
 
+class BindBuffersRangePushBuffer {
+public:
+    BindBuffersRangePushBuffer(GLenum target);
+    ~BindBuffersRangePushBuffer();
+
+    void Setup(GLuint first_);
+
+    void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
+
+    void Bind() const;
+
+private:
+    GLenum target;
+    GLuint first;
+    std::vector<GLuint> buffers;
+    std::vector<GLintptr> offsets;
+    std::vector<GLsizeiptr> sizes;
+};
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
 
 } // namespace OpenGL
 \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
new file mode 100644
index 000000000..ba25b5bc7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vulkan/vulkan.hpp>
+
+namespace Vulkan {
+
+// vulkan.hpp unique handlers use DispatchLoaderStatic
+template <typename T>
+using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
+
+using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
+using UniqueBuffer = UniqueHandle<vk::Buffer>;
+using UniqueBufferView = UniqueHandle<vk::BufferView>;
+using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
+using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
+using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
+using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
+using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
+using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
+using UniqueDevice = UniqueHandle<vk::Device>;
+using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
+using UniqueEvent = UniqueHandle<vk::Event>;
+using UniqueFence = UniqueHandle<vk::Fence>;
+using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
+using UniqueImage = UniqueHandle<vk::Image>;
+using UniqueImageView = UniqueHandle<vk::ImageView>;
+using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
+using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
+using UniquePipeline = UniqueHandle<vk::Pipeline>;
+using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
+using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
+using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
+using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
+using UniqueSampler = UniqueHandle<vk::Sampler>;
+using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
+using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
+using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
+using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
+using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/surface.h"
+
+namespace Vulkan::MaxwellToVK {
+
+namespace Sampler {
+
+vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
+    switch (filter) {
+    case Tegra::Texture::TextureFilter::Linear:
+        return vk::Filter::eLinear;
+    case Tegra::Texture::TextureFilter::Nearest:
+        return vk::Filter::eNearest;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+    return {};
+}
+
+vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
+    switch (mipmap_filter) {
+    case Tegra::Texture::TextureMipmapFilter::None:
+        // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
+        // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
+        // use an image view with a single mipmap level to emulate this.
+        return vk::SamplerMipmapMode::eLinear;
+    case Tegra::Texture::TextureMipmapFilter::Linear:
+        return vk::SamplerMipmapMode::eLinear;
+    case Tegra::Texture::TextureMipmapFilter::Nearest:
+        return vk::SamplerMipmapMode::eNearest;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+    return {};
+}
+
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
+    switch (wrap_mode) {
+    case Tegra::Texture::WrapMode::Wrap:
+        return vk::SamplerAddressMode::eRepeat;
+    case Tegra::Texture::WrapMode::Mirror:
+        return vk::SamplerAddressMode::eMirroredRepeat;
+    case Tegra::Texture::WrapMode::ClampToEdge:
+        return vk::SamplerAddressMode::eClampToEdge;
+    case Tegra::Texture::WrapMode::Border:
+        return vk::SamplerAddressMode::eClampToBorder;
+    case Tegra::Texture::WrapMode::ClampOGL:
+        // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
+        // eClampToBorder to get the border color of the texture, and then sample the edge to
+        // manually mix them. However the shader part of this is not yet implemented.
+        return vk::SamplerAddressMode::eClampToBorder;
+    case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+        return vk::SamplerAddressMode::eMirrorClampToEdge;
+    case Tegra::Texture::WrapMode::MirrorOnceBorder:
+        UNIMPLEMENTED();
+        return vk::SamplerAddressMode::eMirrorClampToEdge;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+    return {};
+}
+
+vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
+    switch (depth_compare_func) {
+    case Tegra::Texture::DepthCompareFunc::Never:
+        return vk::CompareOp::eNever;
+    case Tegra::Texture::DepthCompareFunc::Less:
+        return vk::CompareOp::eLess;
+    case Tegra::Texture::DepthCompareFunc::LessEqual:
+        return vk::CompareOp::eLessOrEqual;
+    case Tegra::Texture::DepthCompareFunc::Equal:
+        return vk::CompareOp::eEqual;
+    case Tegra::Texture::DepthCompareFunc::NotEqual:
+        return vk::CompareOp::eNotEqual;
+    case Tegra::Texture::DepthCompareFunc::Greater:
+        return vk::CompareOp::eGreater;
+    case Tegra::Texture::DepthCompareFunc::GreaterEqual:
+        return vk::CompareOp::eGreaterOrEqual;
+    case Tegra::Texture::DepthCompareFunc::Always:
+        return vk::CompareOp::eAlways;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
+                      static_cast<u32>(depth_compare_func));
+    return {};
+}
+
+} // namespace Sampler
+
+struct FormatTuple {
+    vk::Format format;            ///< Vulkan format
+    ComponentType component_type; ///< Abstracted component type
+    bool attachable;              ///< True when this format can be used as an attachment
+};
+
+static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
+    {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true},    // ABGR8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8UI
+    {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false},     // B5G6R5U
+    {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // A1B5G5R5U
+    {vk::Format::eR8Unorm, ComponentType::UNorm, true},                // R8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R8UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R11FG11FB10F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32UI
+    {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false},     // DXT1
+    {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false},         // DXT23
+    {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false},         // DXT45
+    {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false},         // DXN1
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2UNORM
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2SNORM
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC7U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_UF16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_SF16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_4X4
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BGRA8
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16I
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16I
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGB32F
+    {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true},     // RGBA8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X8
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X5
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_5X4
+
+    // Compressed sRGB formats
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
+
+    // Depth formats
+    {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
+    {vk::Format::eD16Unorm, ComponentType::UNorm, true},  // Z16
+
+    // DepthStencil formats
+    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
+    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
+    {vk::Format::eUndefined, ComponentType::Invalid, false},   // Z32FS8
+}};
+
+static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
+    return pixel_format >= PixelFormat::MaxColorFormat &&
+           pixel_format < PixelFormat::MaxDepthStencilFormat;
+}
+
+std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
+                                          PixelFormat pixel_format, ComponentType component_type) {
+    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
+
+    const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
+    UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
+                         "Unimplemented texture format with pixel format={} and component type={}",
+                         static_cast<u32>(pixel_format), static_cast<u32>(component_type));
+    ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
+
+    auto usage = vk::FormatFeatureFlagBits::eSampledImage |
+                 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
+    if (tuple.attachable) {
+        usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
+                                            : vk::FormatFeatureFlagBits::eColorAttachment;
+    }
+    return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
+}
+
+vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
+    switch (stage) {
+    case Maxwell::ShaderStage::Vertex:
+        return vk::ShaderStageFlagBits::eVertex;
+    case Maxwell::ShaderStage::TesselationControl:
+        return vk::ShaderStageFlagBits::eTessellationControl;
+    case Maxwell::ShaderStage::TesselationEval:
+        return vk::ShaderStageFlagBits::eTessellationEvaluation;
+    case Maxwell::ShaderStage::Geometry:
+        return vk::ShaderStageFlagBits::eGeometry;
+    case Maxwell::ShaderStage::Fragment:
+        return vk::ShaderStageFlagBits::eFragment;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
+    return {};
+}
+
+vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+    switch (topology) {
+    case Maxwell::PrimitiveTopology::Points:
+        return vk::PrimitiveTopology::ePointList;
+    case Maxwell::PrimitiveTopology::Lines:
+        return vk::PrimitiveTopology::eLineList;
+    case Maxwell::PrimitiveTopology::LineStrip:
+        return vk::PrimitiveTopology::eLineStrip;
+    case Maxwell::PrimitiveTopology::Triangles:
+        return vk::PrimitiveTopology::eTriangleList;
+    case Maxwell::PrimitiveTopology::TriangleStrip:
+        return vk::PrimitiveTopology::eTriangleStrip;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+    return {};
+}
+
+vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
+    switch (type) {
+    case Maxwell::VertexAttribute::Type::SignedNorm:
+        break;
+    case Maxwell::VertexAttribute::Type::UnsignedNorm:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Unorm;
+        default:
+            break;
+        }
+        break;
+    case Maxwell::VertexAttribute::Type::SignedInt:
+        break;
+    case Maxwell::VertexAttribute::Type::UnsignedInt:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Uint;
+        default:
+            break;
+        }
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+    case Maxwell::VertexAttribute::Type::SignedScaled:
+        break;
+    case Maxwell::VertexAttribute::Type::Float:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+            return vk::Format::eR32G32B32A32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32:
+            return vk::Format::eR32G32B32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32:
+            return vk::Format::eR32G32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Sfloat;
+        default:
+            break;
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
+                      static_cast<u32>(size));
+    return {};
+}
+
+vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
+    switch (comparison) {
+    case Maxwell::ComparisonOp::Never:
+    case Maxwell::ComparisonOp::NeverOld:
+        return vk::CompareOp::eNever;
+    case Maxwell::ComparisonOp::Less:
+    case Maxwell::ComparisonOp::LessOld:
+        return vk::CompareOp::eLess;
+    case Maxwell::ComparisonOp::Equal:
+    case Maxwell::ComparisonOp::EqualOld:
+        return vk::CompareOp::eEqual;
+    case Maxwell::ComparisonOp::LessEqual:
+    case Maxwell::ComparisonOp::LessEqualOld:
+        return vk::CompareOp::eLessOrEqual;
+    case Maxwell::ComparisonOp::Greater:
+    case Maxwell::ComparisonOp::GreaterOld:
+        return vk::CompareOp::eGreater;
+    case Maxwell::ComparisonOp::NotEqual:
+    case Maxwell::ComparisonOp::NotEqualOld:
+        return vk::CompareOp::eNotEqual;
+    case Maxwell::ComparisonOp::GreaterEqual:
+    case Maxwell::ComparisonOp::GreaterEqualOld:
+        return vk::CompareOp::eGreaterOrEqual;
+    case Maxwell::ComparisonOp::Always:
+    case Maxwell::ComparisonOp::AlwaysOld:
+        return vk::CompareOp::eAlways;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+    return {};
+}
+
+vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
+    switch (index_format) {
+    case Maxwell::IndexFormat::UnsignedByte:
+        UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
+        return vk::IndexType::eUint16;
+    case Maxwell::IndexFormat::UnsignedShort:
+        return vk::IndexType::eUint16;
+    case Maxwell::IndexFormat::UnsignedInt:
+        return vk::IndexType::eUint32;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
+    return {};
+}
+
+vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
+    switch (stencil_op) {
+    case Maxwell::StencilOp::Keep:
+    case Maxwell::StencilOp::KeepOGL:
+        return vk::StencilOp::eKeep;
+    case Maxwell::StencilOp::Zero:
+    case Maxwell::StencilOp::ZeroOGL:
+        return vk::StencilOp::eZero;
+    case Maxwell::StencilOp::Replace:
+    case Maxwell::StencilOp::ReplaceOGL:
+        return vk::StencilOp::eReplace;
+    case Maxwell::StencilOp::Incr:
+    case Maxwell::StencilOp::IncrOGL:
+        return vk::StencilOp::eIncrementAndClamp;
+    case Maxwell::StencilOp::Decr:
+    case Maxwell::StencilOp::DecrOGL:
+        return vk::StencilOp::eDecrementAndClamp;
+    case Maxwell::StencilOp::Invert:
+    case Maxwell::StencilOp::InvertOGL:
+        return vk::StencilOp::eInvert;
+    case Maxwell::StencilOp::IncrWrap:
+    case Maxwell::StencilOp::IncrWrapOGL:
+        return vk::StencilOp::eIncrementAndWrap;
+    case Maxwell::StencilOp::DecrWrap:
+    case Maxwell::StencilOp::DecrWrapOGL:
+        return vk::StencilOp::eDecrementAndWrap;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
+    return {};
+}
+
+vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
+    switch (equation) {
+    case Maxwell::Blend::Equation::Add:
+    case Maxwell::Blend::Equation::AddGL:
+        return vk::BlendOp::eAdd;
+    case Maxwell::Blend::Equation::Subtract:
+    case Maxwell::Blend::Equation::SubtractGL:
+        return vk::BlendOp::eSubtract;
+    case Maxwell::Blend::Equation::ReverseSubtract:
+    case Maxwell::Blend::Equation::ReverseSubtractGL:
+        return vk::BlendOp::eReverseSubtract;
+    case Maxwell::Blend::Equation::Min:
+    case Maxwell::Blend::Equation::MinGL:
+        return vk::BlendOp::eMin;
+    case Maxwell::Blend::Equation::Max:
+    case Maxwell::Blend::Equation::MaxGL:
+        return vk::BlendOp::eMax;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+    return {};
+}
+
+vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
+    switch (factor) {
+    case Maxwell::Blend::Factor::Zero:
+    case Maxwell::Blend::Factor::ZeroGL:
+        return vk::BlendFactor::eZero;
+    case Maxwell::Blend::Factor::One:
+    case Maxwell::Blend::Factor::OneGL:
+        return vk::BlendFactor::eOne;
+    case Maxwell::Blend::Factor::SourceColor:
+    case Maxwell::Blend::Factor::SourceColorGL:
+        return vk::BlendFactor::eSrcColor;
+    case Maxwell::Blend::Factor::OneMinusSourceColor:
+    case Maxwell::Blend::Factor::OneMinusSourceColorGL:
+        return vk::BlendFactor::eOneMinusSrcColor;
+    case Maxwell::Blend::Factor::SourceAlpha:
+    case Maxwell::Blend::Factor::SourceAlphaGL:
+        return vk::BlendFactor::eSrcAlpha;
+    case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+    case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
+        return vk::BlendFactor::eOneMinusSrcAlpha;
+    case Maxwell::Blend::Factor::DestAlpha:
+    case Maxwell::Blend::Factor::DestAlphaGL:
+        return vk::BlendFactor::eDstAlpha;
+    case Maxwell::Blend::Factor::OneMinusDestAlpha:
+    case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
+        return vk::BlendFactor::eOneMinusDstAlpha;
+    case Maxwell::Blend::Factor::DestColor:
+    case Maxwell::Blend::Factor::DestColorGL:
+        return vk::BlendFactor::eDstColor;
+    case Maxwell::Blend::Factor::OneMinusDestColor:
+    case Maxwell::Blend::Factor::OneMinusDestColorGL:
+        return vk::BlendFactor::eOneMinusDstColor;
+    case Maxwell::Blend::Factor::SourceAlphaSaturate:
+    case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
+        return vk::BlendFactor::eSrcAlphaSaturate;
+    case Maxwell::Blend::Factor::Source1Color:
+    case Maxwell::Blend::Factor::Source1ColorGL:
+        return vk::BlendFactor::eSrc1Color;
+    case Maxwell::Blend::Factor::OneMinusSource1Color:
+    case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
+        return vk::BlendFactor::eOneMinusSrc1Color;
+    case Maxwell::Blend::Factor::Source1Alpha:
+    case Maxwell::Blend::Factor::Source1AlphaGL:
+        return vk::BlendFactor::eSrc1Alpha;
+    case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+    case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
+        return vk::BlendFactor::eOneMinusSrc1Alpha;
+    case Maxwell::Blend::Factor::ConstantColor:
+    case Maxwell::Blend::Factor::ConstantColorGL:
+        return vk::BlendFactor::eConstantColor;
+    case Maxwell::Blend::Factor::OneMinusConstantColor:
+    case Maxwell::Blend::Factor::OneMinusConstantColorGL:
+        return vk::BlendFactor::eOneMinusConstantColor;
+    case Maxwell::Blend::Factor::ConstantAlpha:
+    case Maxwell::Blend::Factor::ConstantAlphaGL:
+        return vk::BlendFactor::eConstantAlpha;
+    case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+    case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
+        return vk::BlendFactor::eOneMinusConstantAlpha;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
+    return {};
+}
+
+vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
+    switch (front_face) {
+    case Maxwell::Cull::FrontFace::ClockWise:
+        return vk::FrontFace::eClockwise;
+    case Maxwell::Cull::FrontFace::CounterClockWise:
+        return vk::FrontFace::eCounterClockwise;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
+    return {};
+}
+
+vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
+    switch (cull_face) {
+    case Maxwell::Cull::CullFace::Front:
+        return vk::CullModeFlagBits::eFront;
+    case Maxwell::Cull::CullFace::Back:
+        return vk::CullModeFlagBits::eBack;
+    case Maxwell::Cull::CullFace::FrontAndBack:
+        return vk::CullModeFlagBits::eFrontAndBack;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+    return {};
+}
+
+vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
+    switch (swizzle) {
+    case Tegra::Texture::SwizzleSource::Zero:
+        return vk::ComponentSwizzle::eZero;
+    case Tegra::Texture::SwizzleSource::R:
+        return vk::ComponentSwizzle::eR;
+    case Tegra::Texture::SwizzleSource::G:
+        return vk::ComponentSwizzle::eG;
+    case Tegra::Texture::SwizzleSource::B:
+        return vk::ComponentSwizzle::eB;
+    case Tegra::Texture::SwizzleSource::A:
+        return vk::ComponentSwizzle::eA;
+    case Tegra::Texture::SwizzleSource::OneInt:
+    case Tegra::Texture::SwizzleSource::OneFloat:
+        return vk::ComponentSwizzle::eOne;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
+    return {};
+}
+
+} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/surface.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan::MaxwellToVK {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using ComponentType = VideoCore::Surface::ComponentType;
+
+namespace Sampler {
+
+vk::Filter Filter(Tegra::Texture::TextureFilter filter);
+
+vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
+
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
+
+vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
+
+} // namespace Sampler
+
+std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
+                                          PixelFormat pixel_format, ComponentType component_type);
+
+vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
+
+vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
+
+vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
+
+vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
+
+vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
+
+vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
+
+vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
+
+vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
+
+vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
+
+vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
+
+vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
+
+} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..02a9f5ecb
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,123 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <tuple>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "core/memory.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
+                                     std::size_t alignment, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+      alignment{alignment} {}
+
+VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
+    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
+    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
+                       vk::BufferUsageFlagBits::eIndexBuffer |
+                       vk::BufferUsageFlagBits::eUniformBuffer;
+    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
+                        vk::AccessFlagBits::eUniformRead;
+    stream_buffer =
+        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
+                                         vk::PipelineStageFlagBits::eAllCommands);
+    buffer_handle = stream_buffer->GetBuffer();
+}
+
+VKBufferCache::~VKBufferCache() = default;
+
+u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
+    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
+    ASSERT_MSG(cpu_addr, "Invalid GPU address");
+
+    // Cache management is a big overhead, so only cache entries with a given size.
+    // TODO: Figure out which size is the best for given games.
+    cache &= size >= 2048;
+
+    const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
+    if (cache) {
+        auto entry = TryGet(host_ptr);
+        if (entry) {
+            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+                return entry->GetOffset();
+            }
+            Unregister(entry);
+        }
+    }
+
+    AlignBuffer(alignment);
+    const u64 uploaded_offset = buffer_offset;
+
+    if (!host_ptr) {
+        return uploaded_offset;
+    }
+
+    std::memcpy(buffer_ptr, host_ptr, size);
+    buffer_ptr += size;
+    buffer_offset += size;
+
+    if (cache) {
+        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
+                                                         alignment, host_ptr);
+        Register(entry);
+    }
+
+    return uploaded_offset;
+}
+
+u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
+    AlignBuffer(alignment);
+    std::memcpy(buffer_ptr, raw_pointer, size);
+    const u64 uploaded_offset = buffer_offset;
+
+    buffer_ptr += size;
+    buffer_offset += size;
+    return uploaded_offset;
+}
+
+std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
+    AlignBuffer(alignment);
+    u8* const uploaded_ptr = buffer_ptr;
+    const u64 uploaded_offset = buffer_offset;
+
+    buffer_ptr += size;
+    buffer_offset += size;
+    return {uploaded_ptr, uploaded_offset};
+}
+
+void VKBufferCache::Reserve(std::size_t max_size) {
+    bool invalidate;
+    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
+    buffer_offset = buffer_offset_base;
+
+    if (invalidate) {
+        InvalidateAll();
+    }
+}
+
+VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
+    return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
+}
+
+void VKBufferCache::AlignBuffer(std::size_t alignment) {
+    // Align the offset, not the mapped pointer
+    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
+    buffer_ptr += offset_aligned - buffer_offset;
+    buffer_offset = offset_aligned;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..08b786aad
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,103 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKMemoryManager;
+class VKStreamBuffer;
+
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
+                               u8* host_ptr);
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
+    }
+
+    std::size_t GetSizeInBytes() const override {
+        return size;
+    }
+
+    std::size_t GetSize() const {
+        return size;
+    }
+
+    u64 GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetAlignment() const {
+        return alignment;
+    }
+
+    // We do not have to flush this cache as things in it are never modified by us.
+    void Flush() override {}
+
+private:
+    VAddr cpu_addr{};
+    std::size_t size{};
+    u64 offset{};
+    std::size_t alignment{};
+};
+
+class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+public:
+    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+    ~VKBufferCache();
+
+    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
+    /// allocated.
+    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
+
+    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
+    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
+
+    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
+    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
+
+    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
+    void Reserve(std::size_t max_size);
+
+    /// Ensures that the set data is sent to the device.
+    [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
+
+    /// Returns the buffer cache handle.
+    vk::Buffer GetBuffer() const {
+        return buffer_handle;
+    }
+
+private:
+    void AlignBuffer(std::size_t alignment);
+
+    Tegra::MemoryManager& tegra_memory_manager;
+
+    std::unique_ptr<VKStreamBuffer> stream_buffer;
+    vk::Buffer buffer_handle;
+
+    u8* buffer_ptr = nullptr;
+    u64 buffer_offset = 0;
+    u64 buffer_offset_base = 0;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
new file mode 100644
index 000000000..00242ecbe
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,238 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <optional>
+#include <set>
+#include <vector>
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+
+namespace Vulkan {
+
+namespace Alternatives {
+
+constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
+    vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
+constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
+    vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
+
+} // namespace Alternatives
+
+constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
+    switch (format) {
+    case vk::Format::eD24UnormS8Uint:
+        return Alternatives::Depth24UnormS8Uint.data();
+    case vk::Format::eD16UnormS8Uint:
+        return Alternatives::Depth16UnormS8Uint.data();
+    default:
+        return nullptr;
+    }
+}
+
+constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
+                                                   FormatType format_type) {
+    switch (format_type) {
+    case FormatType::Linear:
+        return properties.linearTilingFeatures;
+    case FormatType::Optimal:
+        return properties.optimalTilingFeatures;
+    case FormatType::Buffer:
+        return properties.bufferFeatures;
+    default:
+        return {};
+    }
+}
+
+VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                   vk::SurfaceKHR surface)
+    : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
+    SetupFamilies(dldi, surface);
+    SetupProperties(dldi);
+}
+
+VKDevice::~VKDevice() = default;
+
+bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
+    const auto queue_cis = GetDeviceQueueCreateInfos();
+    vk::PhysicalDeviceFeatures device_features{};
+
+    const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+    const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
+                                         0, nullptr, static_cast<u32>(extensions.size()),
+                                         extensions.data(), &device_features);
+    vk::Device dummy_logical;
+    if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
+        return false;
+    }
+
+    dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
+    logical = UniqueDevice(
+        dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+
+    graphics_queue = logical->getQueue(graphics_family, 0, dld);
+    present_queue = logical->getQueue(present_family, 0, dld);
+    return true;
+}
+
+vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
+                                        vk::FormatFeatureFlags wanted_usage,
+                                        FormatType format_type) const {
+    if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
+        return wanted_format;
+    }
+    // The wanted format is not supported by hardware, search for alternatives
+    const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
+    if (alternatives == nullptr) {
+        LOG_CRITICAL(Render_Vulkan,
+                     "Format={} with usage={} and type={} has no defined alternatives and host "
+                     "hardware does not support it",
+                     static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                     static_cast<u32>(format_type));
+        UNREACHABLE();
+        return wanted_format;
+    }
+
+    std::size_t i = 0;
+    for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
+         alternative = alternatives[++i]) {
+        if (!IsFormatSupported(alternative, wanted_usage, format_type))
+            continue;
+        LOG_WARNING(Render_Vulkan,
+                    "Emulating format={} with alternative format={} with usage={} and type={}",
+                    static_cast<u32>(wanted_format), static_cast<u32>(alternative),
+                    static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
+        return alternative;
+    }
+
+    // No alternatives found, panic
+    LOG_CRITICAL(Render_Vulkan,
+                 "Format={} with usage={} and type={} is not supported by the host hardware and "
+                 "doesn't support any of the alternatives",
+                 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                 static_cast<u32>(format_type));
+    UNREACHABLE();
+    return wanted_format;
+}
+
+bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                 FormatType format_type) const {
+    const auto it = format_properties.find(wanted_format);
+    if (it == format_properties.end()) {
+        LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
+        UNREACHABLE();
+        return true;
+    }
+    const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+    return (supported_usage & wanted_usage) == wanted_usage;
+}
+
+bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                          vk::SurfaceKHR surface) {
+    const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
+
+    bool has_swapchain{};
+    for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+        has_swapchain |= prop.extensionName == swapchain_extension;
+    }
+    if (!has_swapchain) {
+        // The device doesn't support creating swapchains.
+        return false;
+    }
+
+    bool has_graphics{}, has_present{};
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        const auto& family = queue_family_properties[i];
+        if (family.queueCount == 0)
+            continue;
+
+        has_graphics |=
+            (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
+        has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
+    }
+    if (!has_graphics || !has_present) {
+        // The device doesn't have a graphics and present queue.
+        return false;
+    }
+
+    // TODO(Rodrigo): Check if the device matches all requeriments.
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    if (props.limits.maxUniformBufferRange < 65536) {
+        return false;
+    }
+
+    // Device is suitable.
+    return true;
+}
+
+void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
+    std::optional<u32> graphics_family_, present_family_;
+
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        if (graphics_family_ && present_family_)
+            break;
+
+        const auto& queue_family = queue_family_properties[i];
+        if (queue_family.queueCount == 0)
+            continue;
+
+        if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
+            graphics_family_ = i;
+        if (physical.getSurfaceSupportKHR(i, surface, dldi))
+            present_family_ = i;
+    }
+    ASSERT(graphics_family_ && present_family_);
+
+    graphics_family = *graphics_family_;
+    present_family = *present_family_;
+}
+
+void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    device_type = props.deviceType;
+    uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+}
+
+std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
+    static const float QUEUE_PRIORITY = 1.f;
+
+    std::set<u32> unique_queue_families = {graphics_family, present_family};
+    std::vector<vk::DeviceQueueCreateInfo> queue_cis;
+
+    for (u32 queue_family : unique_queue_families)
+        queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
+
+    return queue_cis;
+}
+
+std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+    std::map<vk::Format, vk::FormatProperties> format_properties;
+
+    const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
+        format_properties.emplace(format, physical.getFormatProperties(format, dldi));
+    };
+    AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
+    AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
+    AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
+    AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
+    AddFormatQuery(vk::Format::eR8Unorm);
+    AddFormatQuery(vk::Format::eD32Sfloat);
+    AddFormatQuery(vk::Format::eD16Unorm);
+    AddFormatQuery(vk::Format::eD16UnormS8Uint);
+    AddFormatQuery(vk::Format::eD24UnormS8Uint);
+    AddFormatQuery(vk::Format::eD32SfloatS8Uint);
+    AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
+    AddFormatQuery(vk::Format::eBc2UnormBlock);
+    AddFormatQuery(vk::Format::eBc3UnormBlock);
+    AddFormatQuery(vk::Format::eBc4UnormBlock);
+
+    return format_properties;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
new file mode 100644
index 000000000..e87c7a508
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+/// Format usage descriptor
+enum class FormatType { Linear, Optimal, Buffer };
+
+/// Handles data specific to a physical device.
+class VKDevice final {
+public:
+    explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                      vk::SurfaceKHR surface);
+    ~VKDevice();
+
+    /// Initializes the device. Returns true on success.
+    bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
+
+    /**
+     * Returns a format supported by the device for the passed requeriments.
+     * @param wanted_format The ideal format to be returned. It may not be the returned format.
+     * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
+     * @param format_type Format type usage.
+     * @returns A format supported by the device.
+     */
+    vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                  FormatType format_type) const;
+
+    /// Returns the dispatch loader with direct function pointers of the device
+    const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
+        return dld;
+    }
+
+    /// Returns the logical device
+    vk::Device GetLogical() const {
+        return logical.get();
+    }
+
+    /// Returns the physical device.
+    vk::PhysicalDevice GetPhysical() const {
+        return physical;
+    }
+
+    /// Returns the main graphics queue.
+    vk::Queue GetGraphicsQueue() const {
+        return graphics_queue;
+    }
+
+    /// Returns the main present queue.
+    vk::Queue GetPresentQueue() const {
+        return present_queue;
+    }
+
+    /// Returns main graphics queue family index.
+    u32 GetGraphicsFamily() const {
+        return graphics_family;
+    }
+
+    /// Returns main present queue family index.
+    u32 GetPresentFamily() const {
+        return present_family;
+    }
+
+    /// Returns if the device is integrated with the host CPU
+    bool IsIntegrated() const {
+        return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
+    }
+
+    /// Returns uniform buffer alignment requeriment
+    u64 GetUniformBufferAlignment() const {
+        return uniform_buffer_alignment;
+    }
+
+    /// Checks if the physical device is suitable.
+    static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                           vk::SurfaceKHR surface);
+
+private:
+    /// Sets up queue families.
+    void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
+
+    /// Sets up device properties.
+    void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
+
+    /// Returns a list of queue initialization descriptors.
+    std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
+
+    /// Returns true if a format is supported.
+    bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                           FormatType format_type) const;
+
+    /// Returns the device properties for Vulkan formats.
+    static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+        const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
+
+    const vk::PhysicalDevice physical;  ///< Physical device
+    vk::DispatchLoaderDynamic dld;      ///< Device function pointers
+    UniqueDevice logical;               ///< Logical device
+    vk::Queue graphics_queue;           ///< Main graphics queue
+    vk::Queue present_queue;            ///< Main present queue
+    u32 graphics_family{};              ///< Main graphics queue family index
+    u32 present_family{};               ///< Main present queue family index
+    vk::PhysicalDeviceType device_type; ///< Physical device type
+    u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment
+    std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
new file mode 100644
index 000000000..0451babbf
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include <tuple>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+// TODO(Rodrigo): Fine tune this number
+constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+
+class VKMemoryAllocation final {
+public:
+    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
+                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
+          shifted_type{ShiftType(type)}, is_mappable{properties &
+                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
+        if (is_mappable) {
+            const auto dev = device.GetLogical();
+            const auto& dld = device.GetDispatchLoader();
+            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
+        }
+    }
+
+    ~VKMemoryAllocation() {
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        if (is_mappable)
+            dev.unmapMemory(memory, dld);
+        dev.free(memory, nullptr, dld);
+    }
+
+    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
+        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
+                                        static_cast<u64>(alignment));
+        if (!found) {
+            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
+                                       static_cast<u64>(alignment));
+            if (!found) {
+                // Signal out of memory, it'll try to do more allocations.
+                return nullptr;
+            }
+        }
+        u8* address = is_mappable ? base_address + *found : nullptr;
+        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+                                                           *found + commit_size);
+        commits.push_back(commit.get());
+
+        // Last commit's address is highly probable to be free.
+        free_iterator = *found + commit_size;
+
+        return commit;
+    }
+
+    void Free(const VKMemoryCommitImpl* commit) {
+        ASSERT(commit);
+        const auto it =
+            std::find_if(commits.begin(), commits.end(),
+                         [&](const auto& stored_commit) { return stored_commit == commit; });
+        if (it == commits.end()) {
+            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
+            UNREACHABLE();
+            return;
+        }
+        commits.erase(it);
+    }
+
+    /// Returns whether this allocation is compatible with the arguments.
+    bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
+        return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
+               (type_mask & shifted_type) != 0;
+    }
+
+private:
+    static constexpr u32 ShiftType(u32 type) {
+        return 1U << type;
+    }
+
+    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
+    /// requeriments.
+    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
+        u64 iterator = start;
+        while (iterator + size < end) {
+            const u64 try_left = Common::AlignUp(iterator, alignment);
+            const u64 try_right = try_left + size;
+
+            bool overlap = false;
+            for (const auto& commit : commits) {
+                const auto [commit_left, commit_right] = commit->interval;
+                if (try_left < commit_right && commit_left < try_right) {
+                    // There's an overlap, continue the search where the overlapping commit ends.
+                    iterator = commit_right;
+                    overlap = true;
+                    break;
+                }
+            }
+            if (!overlap) {
+                // A free address has been found.
+                return try_left;
+            }
+        }
+        // No free regions where found, return an empty optional.
+        return std::nullopt;
+    }
+
+    const VKDevice& device;                   ///< Vulkan device.
+    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
+    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
+    const u64 alloc_size;                     ///< Size of this allocation.
+    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
+    const bool is_mappable;                   ///< Whether the allocation is mappable.
+
+    /// Base address of the mapped pointer.
+    u8* base_address{};
+
+    /// Hints where the next free region is likely going to be.
+    u64 free_iterator{};
+
+    /// Stores all commits done from this allocation.
+    std::vector<const VKMemoryCommitImpl*> commits;
+};
+
+VKMemoryManager::VKMemoryManager(const VKDevice& device)
+    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(props)} {}
+
+VKMemoryManager::~VKMemoryManager() = default;
+
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
+    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+
+    // When a host visible commit is asked, search for host visible and coherent, otherwise search
+    // for a fast device local type.
+    const vk::MemoryPropertyFlags wanted_properties =
+        host_visible
+            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
+            : vk::MemoryPropertyFlagBits::eDeviceLocal;
+
+    const auto TryCommit = [&]() -> VKMemoryCommit {
+        for (auto& alloc : allocs) {
+            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
+                continue;
+
+            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
+                return commit;
+            }
+        }
+        return {};
+    };
+
+    if (auto commit = TryCommit(); commit) {
+        return commit;
+    }
+
+    // Commit has failed, allocate more memory.
+    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
+        // TODO(Rodrigo): Try to use host memory.
+        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
+        UNREACHABLE();
+    }
+
+    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
+    // there's a bug.
+    auto commit = TryCommit();
+    ASSERT(commit);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
+                                  u64 size) {
+    const u32 type = [&]() {
+        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+            const auto flags = props.memoryTypes[type_index].propertyFlags;
+            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
+                // The type matches in type and in the wanted properties.
+                return type_index;
+            }
+        }
+        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
+        UNREACHABLE();
+        return 0u;
+    }();
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    // Try to allocate found type.
+    const vk::MemoryAllocateInfo memory_ai(size, type);
+    vk::DeviceMemory memory;
+    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+        res != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
+        return false;
+    }
+    allocs.push_back(
+        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
+    return true;
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
+    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
+        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+            // Memory is considered unified when heaps are device local only.
+            return false;
+        }
+    }
+    return true;
+}
+
+VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
+                                       u8* data, u64 begin, u64 end)
+    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+
+VKMemoryCommitImpl::~VKMemoryCommitImpl() {
+    allocation->Free(this);
+}
+
+u8* VKMemoryCommitImpl::GetData() const {
+    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
+    return data;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
new file mode 100644
index 000000000..073597b35
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKMemoryAllocation;
+class VKMemoryCommitImpl;
+
+using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
+
+class VKMemoryManager final {
+public:
+    explicit VKMemoryManager(const VKDevice& device);
+    ~VKMemoryManager();
+
+    /**
+     * Commits a memory with the specified requeriments.
+     * @param reqs Requeriments returned from a Vulkan call.
+     * @param host_visible Signals the allocator that it *must* use host visible and coherent
+     * memory. When passing false, it will try to allocate device local memory.
+     * @returns A memory commit.
+     */
+    VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
+
+    /// Commits memory required by the buffer and binds it.
+    VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
+
+    /// Commits memory required by the image and binds it.
+    VKMemoryCommit Commit(vk::Image image, bool host_visible);
+
+    /// Returns true if the memory allocations are done always in host visible and coherent memory.
+    bool IsMemoryUnified() const {
+        return is_memory_unified;
+    }
+
+private:
+    /// Allocates a chunk of memory.
+    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
+
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+
+    const VKDevice& device;                                  ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
+    const bool is_memory_unified;                            ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+};
+
+class VKMemoryCommitImpl final {
+    friend VKMemoryAllocation;
+
+public:
+    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
+                                u64 begin, u64 end);
+    ~VKMemoryCommitImpl();
+
+    /// Returns the writeable memory map. The commit has to be mappable.
+    u8* GetData() const;
+
+    /// Returns the Vulkan memory handler.
+    vk::DeviceMemory GetMemory() const {
+        return memory;
+    }
+
+    /// Returns the start position of the commit relative to the allocation.
+    vk::DeviceSize GetOffset() const {
+        return static_cast<vk::DeviceSize>(interval.first);
+    }
+
+private:
+    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
+    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
+    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
+    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
new file mode 100644
index 000000000..13c46e5b8
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -0,0 +1,285 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+// TODO(Rodrigo): Fine tune these numbers.
+constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
+constexpr std::size_t FENCES_GROW_STEP = 0x40;
+
+class CommandBufferPool final : public VKFencedPool {
+public:
+    CommandBufferPool(const VKDevice& device)
+        : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
+
+    void Allocate(std::size_t begin, std::size_t end) override {
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        const u32 graphics_family = device.GetGraphicsFamily();
+
+        auto pool = std::make_unique<Pool>();
+
+        // Command buffers are going to be commited, recorded, executed every single usage cycle.
+        // They are also going to be reseted when commited.
+        const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient |
+                                vk::CommandPoolCreateFlagBits::eResetCommandBuffer;
+        const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family);
+        pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld);
+
+        const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle,
+                                                      vk::CommandBufferLevel::ePrimary,
+                                                      static_cast<u32>(COMMAND_BUFFER_POOL_SIZE));
+        pool->cmdbufs =
+            dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld);
+
+        pools.push_back(std::move(pool));
+    }
+
+    vk::CommandBuffer Commit(VKFence& fence) {
+        const std::size_t index = CommitResource(fence);
+        const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
+        const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
+        return *pools[pool_index]->cmdbufs[sub_index];
+    }
+
+private:
+    struct Pool {
+        UniqueCommandPool handle;
+        std::vector<UniqueCommandBuffer> cmdbufs;
+    };
+
+    const VKDevice& device;
+
+    std::vector<std::unique_ptr<Pool>> pools;
+};
+
+VKResource::VKResource() = default;
+
+VKResource::~VKResource() = default;
+
+VKFence::VKFence(const VKDevice& device, UniqueFence handle)
+    : device{device}, handle{std::move(handle)} {}
+
+VKFence::~VKFence() = default;
+
+void VKFence::Wait() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
+}
+
+void VKFence::Release() {
+    is_owned = false;
+}
+
+void VKFence::Commit() {
+    is_owned = true;
+    is_used = true;
+}
+
+bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
+    if (!is_used) {
+        // If a fence is not used it's always free.
+        return true;
+    }
+    if (is_owned && !owner_wait) {
+        // The fence is still being owned (Release has not been called) and ownership wait has
+        // not been asked.
+        return false;
+    }
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    if (gpu_wait) {
+        // Wait for the fence if it has been requested.
+        dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
+    } else {
+        if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) {
+            // Vulkan fence is not ready, not much it can do here
+            return false;
+        }
+    }
+
+    // Broadcast resources their free state.
+    for (auto* resource : protected_resources) {
+        resource->OnFenceRemoval(this);
+    }
+    protected_resources.clear();
+
+    // Prepare fence for reusage.
+    dev.resetFences({*handle}, dld);
+    is_used = false;
+    return true;
+}
+
+void VKFence::Protect(VKResource* resource) {
+    protected_resources.push_back(resource);
+}
+
+void VKFence::Unprotect(VKResource* resource) {
+    const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
+    ASSERT(it != protected_resources.end());
+
+    resource->OnFenceRemoval(this);
+    protected_resources.erase(it);
+}
+
+VKFenceWatch::VKFenceWatch() = default;
+
+VKFenceWatch::~VKFenceWatch() {
+    if (fence) {
+        fence->Unprotect(this);
+    }
+}
+
+void VKFenceWatch::Wait() {
+    if (fence == nullptr) {
+        return;
+    }
+    fence->Wait();
+    fence->Unprotect(this);
+}
+
+void VKFenceWatch::Watch(VKFence& new_fence) {
+    Wait();
+    fence = &new_fence;
+    fence->Protect(this);
+}
+
+bool VKFenceWatch::TryWatch(VKFence& new_fence) {
+    if (fence) {
+        return false;
+    }
+    fence = &new_fence;
+    fence->Protect(this);
+    return true;
+}
+
+void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
+    ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
+    fence = nullptr;
+}
+
+VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
+
+VKFencedPool::~VKFencedPool() = default;
+
+std::size_t VKFencedPool::CommitResource(VKFence& fence) {
+    const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
+        for (std::size_t iterator = begin; iterator < end; ++iterator) {
+            if (watches[iterator]->TryWatch(fence)) {
+                // The resource is now being watched, a free resource was successfully found.
+                return iterator;
+            }
+        }
+        return {};
+    };
+    // Try to find a free resource from the hinted position to the end.
+    auto found = Search(free_iterator, watches.size());
+    if (!found) {
+        // Search from beginning to the hinted position.
+        found = Search(0, free_iterator);
+        if (!found) {
+            // Both searches failed, the pool is full; handle it.
+            const std::size_t free_resource = ManageOverflow();
+
+            // Watch will wait for the resource to be free.
+            watches[free_resource]->Watch(fence);
+            found = free_resource;
+        }
+    }
+    // Free iterator is hinted to the resource after the one that's been commited.
+    free_iterator = (*found + 1) % watches.size();
+    return *found;
+}
+
+std::size_t VKFencedPool::ManageOverflow() {
+    const std::size_t old_capacity = watches.size();
+    Grow();
+
+    // The last entry is guaranted to be free, since it's the first element of the freshly
+    // allocated resources.
+    return old_capacity;
+}
+
+void VKFencedPool::Grow() {
+    const std::size_t old_capacity = watches.size();
+    watches.resize(old_capacity + grow_step);
+    std::generate(watches.begin() + old_capacity, watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+    Allocate(old_capacity, old_capacity + grow_step);
+}
+
+VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
+    GrowFences(FENCES_GROW_STEP);
+    command_buffer_pool = std::make_unique<CommandBufferPool>(device);
+}
+
+VKResourceManager::~VKResourceManager() = default;
+
+VKFence& VKResourceManager::CommitFence() {
+    const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
+        const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
+        const auto hinted = fences.begin() + fences_iterator;
+
+        auto it = std::find_if(hinted, fences.end(), Tick);
+        if (it == fences.end()) {
+            it = std::find_if(fences.begin(), hinted, Tick);
+            if (it == hinted) {
+                return nullptr;
+            }
+        }
+        fences_iterator = std::distance(fences.begin(), it) + 1;
+        if (fences_iterator >= fences.size())
+            fences_iterator = 0;
+
+        auto& fence = *it;
+        fence->Commit();
+        return fence.get();
+    };
+
+    VKFence* found_fence = StepFences(false, false);
+    if (!found_fence) {
+        // Try again, this time waiting.
+        found_fence = StepFences(true, false);
+
+        if (!found_fence) {
+            // Allocate new fences and try again.
+            LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
+                     fences.size() + FENCES_GROW_STEP);
+
+            GrowFences(FENCES_GROW_STEP);
+            found_fence = StepFences(true, false);
+            ASSERT(found_fence != nullptr);
+        }
+    }
+    return *found_fence;
+}
+
+vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
+    return command_buffer_pool->Commit(fence);
+}
+
+void VKResourceManager::GrowFences(std::size_t new_fences_count) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const vk::FenceCreateInfo fence_ci;
+
+    const std::size_t previous_size = fences.size();
+    fences.resize(previous_size + new_fences_count);
+
+    std::generate(fences.begin() + previous_size, fences.end(), [&]() {
+        return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld));
+    });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
new file mode 100644
index 000000000..08ee86fa6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -0,0 +1,180 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKResourceManager;
+
+class CommandBufferPool;
+
+/// Interface for a Vulkan resource
+class VKResource {
+public:
+    explicit VKResource();
+    virtual ~VKResource();
+
+    /**
+     * Signals the object that an owning fence has been signaled.
+     * @param signaling_fence Fence that signals its usage end.
+     */
+    virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
+};
+
+/**
+ * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
+ * They must be commited from the resource manager. Their usage flow is: commit the fence from the
+ * resource manager, protect resources with it and use them, send the fence to an execution queue
+ * and Wait for it if needed and then call Release. Used resources will automatically be signaled
+ * when they are free to be reused.
+ * @brief Protects resources for concurrent usage and signals its release.
+ */
+class VKFence {
+    friend class VKResourceManager;
+
+public:
+    explicit VKFence(const VKDevice& device, UniqueFence handle);
+    ~VKFence();
+
+    /**
+     * Waits for the fence to be signaled.
+     * @warning You must have ownership of the fence and it has to be previously sent to a queue to
+     * call this function.
+     */
+    void Wait();
+
+    /**
+     * Releases ownership of the fence. Pass after it has been sent to an execution queue.
+     * Unmanaged usage of the fence after the call will result in undefined behavior because it may
+     * be being used for something else.
+     */
+    void Release();
+
+    /// Protects a resource with this fence.
+    void Protect(VKResource* resource);
+
+    /// Removes protection for a resource.
+    void Unprotect(VKResource* resource);
+
+    /// Retreives the fence.
+    operator vk::Fence() const {
+        return *handle;
+    }
+
+private:
+    /// Take ownership of the fence.
+    void Commit();
+
+    /**
+     * Updates the fence status.
+     * @warning Waiting for the owner might soft lock the execution.
+     * @param gpu_wait Wait for the fence to be signaled by the driver.
+     * @param owner_wait Wait for the owner to signal its freedom.
+     * @returns True if the fence is free. Waiting for gpu and owner will always return true.
+     */
+    bool Tick(bool gpu_wait, bool owner_wait);
+
+    const VKDevice& device;                       ///< Device handler
+    UniqueFence handle;                           ///< Vulkan fence
+    std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
+    bool is_owned = false; ///< The fence has been commited but not released yet.
+    bool is_used = false;  ///< The fence has been commited but it has not been checked to be free.
+};
+
+/**
+ * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
+ * resources without having to inherit VKResource from their handlers.
+ */
+class VKFenceWatch final : public VKResource {
+public:
+    explicit VKFenceWatch();
+    ~VKFenceWatch() override;
+
+    /// Waits for the fence to be released.
+    void Wait();
+
+    /**
+     * Waits for a previous fence and watches a new one.
+     * @param new_fence New fence to wait to.
+     */
+    void Watch(VKFence& new_fence);
+
+    /**
+     * Checks if it's currently being watched and starts watching it if it's available.
+     * @returns True if a watch has started, false if it's being watched.
+     */
+    bool TryWatch(VKFence& new_fence);
+
+    void OnFenceRemoval(VKFence* signaling_fence) override;
+
+private:
+    VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
+};
+
+/**
+ * Handles a pool of resources protected by fences. Manages resource overflow allocating more
+ * resources.
+ */
+class VKFencedPool {
+public:
+    explicit VKFencedPool(std::size_t grow_step);
+    virtual ~VKFencedPool();
+
+protected:
+    /**
+     * Commits a free resource and protects it with a fence. It may allocate new resources.
+     * @param fence Fence that protects the commited resource.
+     * @returns Index of the resource commited.
+     */
+    std::size_t CommitResource(VKFence& fence);
+
+    /// Called when a chunk of resources have to be allocated.
+    virtual void Allocate(std::size_t begin, std::size_t end) = 0;
+
+private:
+    /// Manages pool overflow allocating new resources.
+    std::size_t ManageOverflow();
+
+    /// Allocates a new page of resources.
+    void Grow();
+
+    std::size_t grow_step = 0;     ///< Number of new resources created after an overflow
+    std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
+    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
+};
+
+/**
+ * The resource manager handles all resources that can be protected with a fence avoiding
+ * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
+ */
+class VKResourceManager final {
+public:
+    explicit VKResourceManager(const VKDevice& device);
+    ~VKResourceManager();
+
+    /// Commits a fence. It has to be sent to a queue and released.
+    VKFence& CommitFence();
+
+    /// Commits an unused command buffer and protects it with a fence.
+    vk::CommandBuffer CommitCommandBuffer(VKFence& fence);
+
+private:
+    /// Allocates new fences.
+    void GrowFences(std::size_t new_fences_count);
+
+    const VKDevice& device;          ///< Device handler.
+    std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
+    std::vector<std::unique_ptr<VKFence>> fences;           ///< Pool of fences.
+    std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
new file mode 100644
index 000000000..ed3178f09
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -0,0 +1,81 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <optional>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan {
+
+static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) {
+    // TODO(Rodrigo): Manage integer border colors
+    if (color == std::array<float, 4>{0, 0, 0, 0}) {
+        return vk::BorderColor::eFloatTransparentBlack;
+    } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
+        return vk::BorderColor::eFloatOpaqueBlack;
+    } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
+        return vk::BorderColor::eFloatOpaqueWhite;
+    } else {
+        return {};
+    }
+}
+
+std::size_t SamplerCacheKey::Hash() const {
+    static_assert(sizeof(raw) % sizeof(u64) == 0);
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
+}
+
+bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
+    return raw == rhs.raw;
+}
+
+VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
+
+VKSamplerCache::~VKSamplerCache() = default;
+
+vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
+    const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
+    auto& sampler = entry->second;
+    if (is_cache_miss) {
+        sampler = CreateSampler(tsc);
+    }
+    return *sampler;
+}
+
+UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
+    const float max_anisotropy = tsc.GetMaxAnisotropy();
+    const bool has_anisotropy = max_anisotropy > 1.0f;
+
+    const auto border_color = tsc.GetBorderColor();
+    const auto vk_border_color = TryConvertBorderColor(border_color);
+    UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
+                         border_color[0], border_color[1], border_color[2], border_color[3]);
+
+    constexpr bool unnormalized_coords = false;
+
+    const vk::SamplerCreateInfo sampler_ci(
+        {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+        MaxwellToVK::Sampler::Filter(tsc.min_filter),
+        MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
+        max_anisotropy, tsc.depth_compare_enabled,
+        MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
+        tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
+        unnormalized_coords);
+
+    const auto& dld = device.GetDispatchLoader();
+    const auto dev = device.GetLogical();
+    return dev.createSamplerUnique(sampler_ci, nullptr, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
new file mode 100644
index 000000000..c6394dc87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
+    std::size_t Hash() const;
+
+    bool operator==(const SamplerCacheKey& rhs) const;
+
+    bool operator!=(const SamplerCacheKey& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::SamplerCacheKey> {
+    std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKSamplerCache {
+public:
+    explicit VKSamplerCache(const VKDevice& device);
+    ~VKSamplerCache();
+
+    vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
+
+private:
+    UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
+
+    const VKDevice& device;
+    std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
new file mode 100644
index 000000000..f1fea1871
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager} {
+    next_fence = &resource_manager.CommitFence();
+    AllocateNewContext();
+}
+
+VKScheduler::~VKScheduler() = default;
+
+VKExecutionContext VKScheduler::GetExecutionContext() const {
+    return VKExecutionContext(current_fence, current_cmdbuf);
+}
+
+VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Wait();
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.end(dld);
+
+    const auto queue = device.GetGraphicsQueue();
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+                                     &semaphore);
+    queue.submit({submit_info}, *current_fence, dld);
+}
+
+void VKScheduler::AllocateNewContext() {
+    current_fence = next_fence;
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    next_fence = &resource_manager.CommitFence();
+
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
new file mode 100644
index 000000000..cfaf5376f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -0,0 +1,69 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKExecutionContext;
+class VKFence;
+class VKResourceManager;
+
+/// The scheduler abstracts command buffer and fence management with an interface that's able to do
+/// OpenGL-like operations on Vulkan command buffers.
+class VKScheduler {
+public:
+    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
+    ~VKScheduler();
+
+    /// Gets the current execution context.
+    [[nodiscard]] VKExecutionContext GetExecutionContext() const;
+
+    /// Sends the current execution context to the GPU. It invalidates the current execution context
+    /// and returns a new one.
+    VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
+
+    /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
+    /// the current execution context and returns a new one.
+    VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
+
+private:
+    void SubmitExecution(vk::Semaphore semaphore);
+
+    void AllocateNewContext();
+
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    vk::CommandBuffer current_cmdbuf;
+    VKFence* current_fence = nullptr;
+    VKFence* next_fence = nullptr;
+};
+
+class VKExecutionContext {
+    friend class VKScheduler;
+
+public:
+    VKExecutionContext() = default;
+
+    VKFence& GetFence() const {
+        return *fence;
+    }
+
+    vk::CommandBuffer GetCommandBuffer() const {
+        return cmdbuf;
+    }
+
+private:
+    explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
+        : fence{fence}, cmdbuf{cmdbuf} {}
+
+    VKFence* fence{};
+    vk::CommandBuffer cmdbuf;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
new file mode 100644
index 000000000..e0a6f5e87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -0,0 +1,1379 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <functional>
+#include <map>
+#include <set>
+
+#include <fmt/format.h>
+
+#include <sirit/sirit.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace Vulkan::VKShader {
+
+using Sirit::Id;
+using Tegra::Shader::Attribute;
+using Tegra::Shader::AttributeUse;
+using Tegra::Shader::Register;
+using namespace VideoCommon::Shader;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
+using Operation = const OperationNode&;
+
+// TODO(Rodrigo): Use rasterizer's value
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;
+constexpr u32 STAGE_BINDING_STRIDE = 0x100;
+
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct SamplerImage {
+    Id image_type;
+    Id sampled_image_type;
+    Id sampler;
+};
+
+namespace {
+
+spv::Dim GetSamplerDim(const Sampler& sampler) {
+    switch (sampler.GetType()) {
+    case Tegra::Shader::TextureType::Texture1D:
+        return spv::Dim::Dim1D;
+    case Tegra::Shader::TextureType::Texture2D:
+        return spv::Dim::Dim2D;
+    case Tegra::Shader::TextureType::Texture3D:
+        return spv::Dim::Dim3D;
+    case Tegra::Shader::TextureType::TextureCube:
+        return spv::Dim::Cube;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<u32>(sampler.GetType()));
+        return spv::Dim::Dim2D;
+    }
+}
+
+/// Returns true if an attribute index is one of the 32 generic attributes
+constexpr bool IsGenericAttribute(Attribute::Index attribute) {
+    return attribute >= Attribute::Index::Attribute_0 &&
+           attribute <= Attribute::Index::Attribute_31;
+}
+
+/// Returns the location of a generic attribute
+constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) {
+    ASSERT(IsGenericAttribute(attribute));
+    return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
+/// Returns true if an object has to be treated as precise
+bool IsPrecise(Operation operand) {
+    const auto& meta = operand.GetMeta();
+
+    if (std::holds_alternative<MetaArithmetic>(meta)) {
+        return std::get<MetaArithmetic>(meta).precise;
+    }
+    if (std::holds_alternative<MetaHalfArithmetic>(meta)) {
+        return std::get<MetaHalfArithmetic>(meta).precise;
+    }
+    return false;
+}
+
+} // namespace
+
+class SPIRVDecompiler : public Sirit::Module {
+public:
+    explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)
+        : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {
+        AddCapability(spv::Capability::Shader);
+        AddExtension("SPV_KHR_storage_buffer_storage_class");
+        AddExtension("SPV_KHR_variable_pointers");
+    }
+
+    void Decompile() {
+        AllocateBindings();
+        AllocateLabels();
+
+        DeclareVertex();
+        DeclareGeometry();
+        DeclareFragment();
+        DeclareRegisters();
+        DeclarePredicates();
+        DeclareLocalMemory();
+        DeclareInternalFlags();
+        DeclareInputAttributes();
+        DeclareOutputAttributes();
+        DeclareConstantBuffers();
+        DeclareGlobalBuffers();
+        DeclareSamplers();
+
+        execute_function =
+            Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
+        Emit(OpLabel());
+
+        const u32 first_address = ir.GetBasicBlocks().begin()->first;
+        const Id loop_label = OpLabel("loop");
+        const Id merge_label = OpLabel("merge");
+        const Id dummy_label = OpLabel();
+        const Id jump_label = OpLabel();
+        continue_label = OpLabel("continue");
+
+        std::vector<Sirit::Literal> literals;
+        std::vector<Id> branch_labels;
+        for (const auto& pair : labels) {
+            const auto [literal, label] = pair;
+            literals.push_back(literal);
+            branch_labels.push_back(label);
+        }
+
+        // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
+        // that shaders will use 20 nested SSYs and PBKs.
+        constexpr u32 FLOW_STACK_SIZE = 20;
+        const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
+        jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
+                                 spv::StorageClass::Function, Constant(t_uint, first_address)));
+        flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type),
+                                     spv::StorageClass::Function, ConstantNull(flow_stack_type)));
+        flow_stack_top =
+            Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0)));
+
+        Name(jmp_to, "jmp_to");
+        Name(flow_stack, "flow_stack");
+        Name(flow_stack_top, "flow_stack_top");
+
+        Emit(OpBranch(loop_label));
+        Emit(loop_label);
+        Emit(OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::Unroll));
+        Emit(OpBranch(dummy_label));
+
+        Emit(dummy_label);
+        const Id default_branch = OpLabel();
+        const Id jmp_to_load = Emit(OpLoad(t_uint, jmp_to));
+        Emit(OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone));
+        Emit(OpSwitch(jmp_to_load, default_branch, literals, branch_labels));
+
+        Emit(default_branch);
+        Emit(OpReturn());
+
+        for (const auto& pair : ir.GetBasicBlocks()) {
+            const auto& [address, bb] = pair;
+            Emit(labels.at(address));
+
+            VisitBasicBlock(bb);
+
+            const auto next_it = labels.lower_bound(address + 1);
+            const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
+            Emit(OpBranch(next_label));
+        }
+
+        Emit(jump_label);
+        Emit(OpBranch(continue_label));
+        Emit(continue_label);
+        Emit(OpBranch(loop_label));
+        Emit(merge_label);
+        Emit(OpReturn());
+        Emit(OpFunctionEnd());
+    }
+
+    ShaderEntries GetShaderEntries() const {
+        ShaderEntries entries;
+        entries.const_buffers_base_binding = const_buffers_base_binding;
+        entries.global_buffers_base_binding = global_buffers_base_binding;
+        entries.samplers_base_binding = samplers_base_binding;
+        for (const auto& cbuf : ir.GetConstantBuffers()) {
+            entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
+        }
+        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
+            entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
+        }
+        for (const auto& sampler : ir.GetSamplers()) {
+            entries.samplers.emplace_back(sampler);
+        }
+        for (const auto& attr : ir.GetInputAttributes()) {
+            entries.attributes.insert(GetGenericAttributeLocation(attr.first));
+        }
+        entries.clip_distances = ir.GetClipDistances();
+        entries.shader_length = ir.GetLength();
+        entries.entry_function = execute_function;
+        entries.interfaces = interfaces;
+        return entries;
+    }
+
+private:
+    using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
+    using OperationDecompilersArray =
+        std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
+
+    static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
+    static constexpr u32 CBUF_STRIDE = 16;
+
+    void AllocateBindings() {
+        const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
+        u32 binding_iterator = binding_base;
+
+        const auto Allocate = [&binding_iterator](std::size_t count) {
+            const u32 current_binding = binding_iterator;
+            binding_iterator += static_cast<u32>(count);
+            return current_binding;
+        };
+        const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size());
+        global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size());
+        samplers_base_binding = Allocate(ir.GetSamplers().size());
+
+        ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE,
+                   "Stage binding stride is too small");
+    }
+
+    void AllocateLabels() {
+        for (const auto& pair : ir.GetBasicBlocks()) {
+            const u32 address = pair.first;
+            labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
+        }
+    }
+
+    void DeclareVertex() {
+        if (stage != ShaderStage::Vertex)
+            return;
+
+        DeclareVertexRedeclarations();
+    }
+
+    void DeclareGeometry() {
+        if (stage != ShaderStage::Geometry)
+            return;
+
+        UNIMPLEMENTED();
+    }
+
+    void DeclareFragment() {
+        if (stage != ShaderStage::Fragment)
+            return;
+
+        for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
+            if (!IsRenderTargetUsed(rt)) {
+                continue;
+            }
+
+            const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
+            Name(id, fmt::format("frag_color{}", rt));
+            Decorate(id, spv::Decoration::Location, rt);
+
+            frag_colors[rt] = id;
+            interfaces.push_back(id);
+        }
+
+        if (header.ps.omap.depth) {
+            frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
+            Name(frag_depth, "frag_depth");
+            Decorate(frag_depth, spv::Decoration::BuiltIn,
+                     static_cast<u32>(spv::BuiltIn::FragDepth));
+
+            interfaces.push_back(frag_depth);
+        }
+
+        frag_coord = DeclareBuiltIn(spv::BuiltIn::FragCoord, spv::StorageClass::Input, t_in_float4,
+                                    "frag_coord");
+        front_facing = DeclareBuiltIn(spv::BuiltIn::FrontFacing, spv::StorageClass::Input,
+                                      t_in_bool, "front_facing");
+    }
+
+    void DeclareRegisters() {
+        for (const u32 gpr : ir.GetRegisters()) {
+            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
+            Name(id, fmt::format("gpr_{}", gpr));
+            registers.emplace(gpr, AddGlobalVariable(id));
+        }
+    }
+
+    void DeclarePredicates() {
+        for (const auto pred : ir.GetPredicates()) {
+            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+            Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
+            predicates.emplace(pred, AddGlobalVariable(id));
+        }
+    }
+
+    void DeclareLocalMemory() {
+        if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
+            const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4);
+            const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
+            const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
+            Name(type_pointer, "LocalMemory");
+
+            local_memory =
+                OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
+            AddGlobalVariable(Name(local_memory, "local_memory"));
+        }
+    }
+
+    void DeclareInternalFlags() {
+        constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
+                                                                         "overflow"};
+        for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
+            const auto flag_code = static_cast<InternalFlag>(flag);
+            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+            internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
+        }
+    }
+
+    void DeclareInputAttributes() {
+        for (const auto element : ir.GetInputAttributes()) {
+            const Attribute::Index index = element.first;
+            if (!IsGenericAttribute(index)) {
+                continue;
+            }
+
+            UNIMPLEMENTED_IF(stage == ShaderStage::Geometry);
+
+            const u32 location = GetGenericAttributeLocation(index);
+            const Id id = OpVariable(t_in_float4, spv::StorageClass::Input);
+            Name(AddGlobalVariable(id), fmt::format("in_attr{}", location));
+            input_attributes.emplace(index, id);
+            interfaces.push_back(id);
+
+            Decorate(id, spv::Decoration::Location, location);
+
+            if (stage != ShaderStage::Fragment) {
+                continue;
+            }
+            switch (header.ps.GetAttributeUse(location)) {
+            case AttributeUse::Constant:
+                Decorate(id, spv::Decoration::Flat);
+                break;
+            case AttributeUse::ScreenLinear:
+                Decorate(id, spv::Decoration::NoPerspective);
+                break;
+            case AttributeUse::Perspective:
+                // Default
+                break;
+            default:
+                UNREACHABLE_MSG("Unused attribute being fetched");
+            }
+        }
+    }
+
+    void DeclareOutputAttributes() {
+        for (const auto index : ir.GetOutputAttributes()) {
+            if (!IsGenericAttribute(index)) {
+                continue;
+            }
+            const auto location = GetGenericAttributeLocation(index);
+            const Id id = OpVariable(t_out_float4, spv::StorageClass::Output);
+            Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
+            output_attributes.emplace(index, id);
+            interfaces.push_back(id);
+
+            Decorate(id, spv::Decoration::Location, location);
+        }
+    }
+
+    void DeclareConstantBuffers() {
+        u32 binding = const_buffers_base_binding;
+        for (const auto& entry : ir.GetConstantBuffers()) {
+            const auto [index, size] = entry;
+            const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);
+            AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+            constant_buffers.emplace(index, id);
+        }
+    }
+
+    void DeclareGlobalBuffers() {
+        u32 binding = global_buffers_base_binding;
+        for (const auto& entry : ir.GetGlobalMemoryBases()) {
+            const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
+            AddGlobalVariable(
+                Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset)));
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+            global_buffers.emplace(entry, id);
+        }
+    }
+
+    void DeclareSamplers() {
+        u32 binding = samplers_base_binding;
+        for (const auto& sampler : ir.GetSamplers()) {
+            const auto dim = GetSamplerDim(sampler);
+            const int depth = sampler.IsShadow() ? 1 : 0;
+            const int arrayed = sampler.IsArray() ? 1 : 0;
+            // TODO(Rodrigo): Sampled 1 indicates that the image will be used with a sampler. When
+            // SULD and SUST instructions are implemented, replace this value.
+            const int sampled = 1;
+            const Id image_type =
+                TypeImage(t_float, dim, depth, arrayed, false, sampled, spv::ImageFormat::Unknown);
+            const Id sampled_image_type = TypeSampledImage(image_type);
+            const Id pointer_type =
+                TypePointer(spv::StorageClass::UniformConstant, sampled_image_type);
+            const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
+            AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
+
+            sampler_images.insert(
+                {static_cast<u32>(sampler.GetIndex()), {image_type, sampled_image_type, id}});
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+        }
+    }
+
+    void DeclareVertexRedeclarations() {
+        vertex_index = DeclareBuiltIn(spv::BuiltIn::VertexIndex, spv::StorageClass::Input,
+                                      t_in_uint, "vertex_index");
+        instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
+                                        t_in_uint, "instance_index");
+
+        bool is_point_size_declared = false;
+        bool is_clip_distances_declared = false;
+        for (const auto index : ir.GetOutputAttributes()) {
+            if (index == Attribute::Index::PointSize) {
+                is_point_size_declared = true;
+            } else if (index == Attribute::Index::ClipDistances0123 ||
+                       index == Attribute::Index::ClipDistances4567) {
+                is_clip_distances_declared = true;
+            }
+        }
+
+        std::vector<Id> members;
+        members.push_back(t_float4);
+        if (is_point_size_declared) {
+            members.push_back(t_float);
+        }
+        if (is_clip_distances_declared) {
+            members.push_back(TypeArray(t_float, Constant(t_uint, 8)));
+        }
+
+        const Id gl_per_vertex_struct = Name(TypeStruct(members), "PerVertex");
+        Decorate(gl_per_vertex_struct, spv::Decoration::Block);
+
+        u32 declaration_index = 0;
+        const auto MemberDecorateBuiltIn = [&](spv::BuiltIn builtin, std::string name,
+                                               bool condition) {
+            if (!condition)
+                return u32{};
+            MemberName(gl_per_vertex_struct, declaration_index, name);
+            MemberDecorate(gl_per_vertex_struct, declaration_index, spv::Decoration::BuiltIn,
+                           static_cast<u32>(builtin));
+            return declaration_index++;
+        };
+
+        position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
+        point_size_index =
+            MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared);
+        clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
+                                                     is_clip_distances_declared);
+
+        const Id type_pointer = TypePointer(spv::StorageClass::Output, gl_per_vertex_struct);
+        per_vertex = OpVariable(type_pointer, spv::StorageClass::Output);
+        AddGlobalVariable(Name(per_vertex, "per_vertex"));
+        interfaces.push_back(per_vertex);
+    }
+
+    void VisitBasicBlock(const NodeBlock& bb) {
+        for (const Node node : bb) {
+            static_cast<void>(Visit(node));
+        }
+    }
+
+    Id Visit(Node node) {
+        if (const auto operation = std::get_if<OperationNode>(node)) {
+            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+            const auto decompiler = operation_decompilers[operation_index];
+            if (decompiler == nullptr) {
+                UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
+            }
+            return (this->*decompiler)(*operation);
+
+        } else if (const auto gpr = std::get_if<GprNode>(node)) {
+            const u32 index = gpr->GetIndex();
+            if (index == Register::ZeroIndex) {
+                return Constant(t_float, 0.0f);
+            }
+            return Emit(OpLoad(t_float, registers.at(index)));
+
+        } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+            return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue()));
+
+        } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
+            const auto value = [&]() -> Id {
+                switch (const auto index = predicate->GetIndex(); index) {
+                case Tegra::Shader::Pred::UnusedIndex:
+                    return v_true;
+                case Tegra::Shader::Pred::NeverExecute:
+                    return v_false;
+                default:
+                    return Emit(OpLoad(t_bool, predicates.at(index)));
+                }
+            }();
+            if (predicate->IsNegated()) {
+                return Emit(OpLogicalNot(t_bool, value));
+            }
+            return value;
+
+        } else if (const auto abuf = std::get_if<AbufNode>(node)) {
+            const auto attribute = abuf->GetIndex();
+            const auto element = abuf->GetElement();
+
+            switch (attribute) {
+            case Attribute::Index::Position:
+                if (stage != ShaderStage::Fragment) {
+                    UNIMPLEMENTED();
+                    break;
+                } else {
+                    if (element == 3) {
+                        return Constant(t_float, 1.0f);
+                    }
+                    return Emit(OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)));
+                }
+            case Attribute::Index::TessCoordInstanceIDVertexID:
+                // TODO(Subv): Find out what the values are for the first two elements when inside a
+                // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+                // shader.
+                ASSERT(stage == ShaderStage::Vertex);
+                switch (element) {
+                case 2:
+                    return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index)));
+                case 3:
+                    return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, vertex_index)));
+                }
+                UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+                return Constant(t_float, 0);
+            case Attribute::Index::FrontFacing:
+                // TODO(Subv): Find out what the values are for the other elements.
+                ASSERT(stage == ShaderStage::Fragment);
+                if (element == 3) {
+                    const Id is_front_facing = Emit(OpLoad(t_bool, front_facing));
+                    const Id true_value =
+                        BitcastTo<Type::Float>(Constant(t_int, static_cast<s32>(-1)));
+                    const Id false_value = BitcastTo<Type::Float>(Constant(t_int, 0));
+                    return Emit(OpSelect(t_float, is_front_facing, true_value, false_value));
+                }
+                UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
+                return Constant(t_float, 0.0f);
+            default:
+                if (IsGenericAttribute(attribute)) {
+                    const Id pointer =
+                        AccessElement(t_in_float, input_attributes.at(attribute), element);
+                    return Emit(OpLoad(t_float, pointer));
+                }
+                break;
+            }
+            UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+
+        } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
+            const Node offset = cbuf->GetOffset();
+            const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
+
+            Id buffer_index{};
+            Id buffer_element{};
+
+            if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+                // Direct access
+                const u32 offset_imm = immediate->GetValue();
+                ASSERT(offset_imm % 4 == 0);
+                buffer_index = Constant(t_uint, offset_imm / 16);
+                buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
+
+            } else if (std::holds_alternative<OperationNode>(*offset)) {
+                // Indirect access
+                // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
+                // emits sub-optimal code on GLSL from my testing).
+                const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
+                const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
+                const Id final_offset = Emit(
+                    OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
+                buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
+                buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
+
+            } else {
+                UNREACHABLE_MSG("Unmanaged offset node type");
+            }
+
+            const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
+                                                  buffer_index, buffer_element));
+            return Emit(OpLoad(t_float, pointer));
+
+        } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
+            const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress()));
+            const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress()));
+
+            Id offset = Emit(OpISub(t_uint, real, base));
+            offset = Emit(OpUDiv(t_uint, offset, Constant(t_uint, 4u)));
+            return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer,
+                                                           Constant(t_uint, 0u), offset))));
+
+        } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+            // It's invalid to call conditional on nested nodes, use an operation instead
+            const Id true_label = OpLabel();
+            const Id skip_label = OpLabel();
+            Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));
+            Emit(true_label);
+
+            VisitBasicBlock(conditional->GetCode());
+
+            Emit(OpBranch(skip_label));
+            Emit(skip_label);
+            return {};
+
+        } else if (const auto comment = std::get_if<CommentNode>(node)) {
+            Name(Emit(OpUndef(t_void)), comment->GetText());
+            return {};
+        }
+
+        UNREACHABLE();
+        return {};
+    }
+
+    template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
+    Id Unary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a>
+    Id Binary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a, Type type_c = type_b>
+    Id Ternary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+        const Id op_c = VisitOperand<type_c>(operation, 2);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
+    Id Quaternary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+        const Id op_c = VisitOperand<type_c>(operation, 2);
+        const Id op_d = VisitOperand<type_d>(operation, 3);
+
+        const Id value =
+            BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c, op_d)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    Id Assign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
+
+        Id target{};
+        if (const auto gpr = std::get_if<GprNode>(dest)) {
+            if (gpr->GetIndex() == Register::ZeroIndex) {
+                // Writing to Register::ZeroIndex is a no op
+                return {};
+            }
+            target = registers.at(gpr->GetIndex());
+
+        } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+            target = [&]() -> Id {
+                switch (const auto attribute = abuf->GetIndex(); attribute) {
+                case Attribute::Index::Position:
+                    return AccessElement(t_out_float, per_vertex, position_index,
+                                         abuf->GetElement());
+                case Attribute::Index::PointSize:
+                    return AccessElement(t_out_float, per_vertex, point_size_index);
+                case Attribute::Index::ClipDistances0123:
+                    return AccessElement(t_out_float, per_vertex, clip_distances_index,
+                                         abuf->GetElement());
+                case Attribute::Index::ClipDistances4567:
+                    return AccessElement(t_out_float, per_vertex, clip_distances_index,
+                                         abuf->GetElement() + 4);
+                default:
+                    if (IsGenericAttribute(attribute)) {
+                        return AccessElement(t_out_float, output_attributes.at(attribute),
+                                             abuf->GetElement());
+                    }
+                    UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
+                                      static_cast<u32>(attribute));
+                    return {};
+                }
+            }();
+
+        } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
+            Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress()));
+            address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4)));
+            target = Emit(OpAccessChain(t_prv_float, local_memory, {address}));
+        }
+
+        Emit(OpStore(target, Visit(src)));
+        return {};
+    }
+
+    Id HNegate(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeF32(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeH0(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeH1(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HPack2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAssign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
+
+        Id target{};
+        if (const auto pred = std::get_if<PredicateNode>(dest)) {
+            ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
+
+            const auto index = pred->GetIndex();
+            switch (index) {
+            case Tegra::Shader::Pred::NeverExecute:
+            case Tegra::Shader::Pred::UnusedIndex:
+                // Writing to these predicates is a no-op
+                return {};
+            }
+            target = predicates.at(index);
+
+        } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
+            target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
+        }
+
+        Emit(OpStore(target, Visit(src)));
+        return {};
+    }
+
+    Id LogicalPick2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAll2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAny2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id GetTextureSampler(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
+        return Emit(OpLoad(entry.sampled_image_type, entry.sampler));
+    }
+
+    Id GetTextureImage(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
+        return Emit(OpImage(entry.image_type, GetTextureSampler(operation)));
+    }
+
+    Id GetTextureCoordinates(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        std::vector<Id> coords;
+        for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
+            coords.push_back(Visit(operation[i]));
+        }
+        if (meta->sampler.IsArray()) {
+            const Id array_integer = BitcastTo<Type::Int>(Visit(meta->array));
+            coords.push_back(Emit(OpConvertSToF(t_float, array_integer)));
+        }
+        if (meta->sampler.IsShadow()) {
+            coords.push_back(Visit(meta->depth_compare));
+        }
+
+        const std::array<Id, 4> t_float_lut = {nullptr, t_float2, t_float3, t_float4};
+        return coords.size() == 1
+                   ? coords[0]
+                   : Emit(OpCompositeConstruct(t_float_lut.at(coords.size() - 1), coords));
+    }
+
+    Id GetTextureElement(Operation operation, Id sample_value) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
+        return Emit(OpCompositeExtract(t_float, sample_value, meta->element));
+    }
+
+    Id Texture(Operation operation) {
+        const Id texture = Emit(OpImageSampleImplicitLod(t_float4, GetTextureSampler(operation),
+                                                         GetTextureCoordinates(operation)));
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureLod(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const Id texture = Emit(OpImageSampleExplicitLod(
+            t_float4, GetTextureSampler(operation), GetTextureCoordinates(operation),
+            spv::ImageOperandsMask::Lod, Visit(meta->lod)));
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureGather(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto coords = GetTextureCoordinates(operation);
+
+        Id texture;
+        if (meta->sampler.IsShadow()) {
+            texture = Emit(OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
+                                             Visit(meta->component)));
+        } else {
+            u32 component_value = 0;
+            if (meta->component) {
+                const auto component = std::get_if<ImmediateNode>(meta->component);
+                ASSERT_MSG(component, "Component is not an immediate value");
+                component_value = component->GetValue();
+            }
+            texture = Emit(OpImageGather(t_float4, GetTextureSampler(operation), coords,
+                                         Constant(t_uint, component_value)));
+        }
+
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureQueryDimensions(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto image_id = GetTextureImage(operation);
+        AddCapability(spv::Capability::ImageQuery);
+
+        if (meta->element == 3) {
+            return BitcastTo<Type::Float>(Emit(OpImageQueryLevels(t_int, image_id)));
+        }
+
+        const Id lod = VisitOperand<Type::Uint>(operation, 0);
+        const std::size_t coords_count = [&]() {
+            switch (const auto type = meta->sampler.GetType(); type) {
+            case Tegra::Shader::TextureType::Texture1D:
+                return 1;
+            case Tegra::Shader::TextureType::Texture2D:
+            case Tegra::Shader::TextureType::TextureCube:
+                return 2;
+            case Tegra::Shader::TextureType::Texture3D:
+                return 3;
+            default:
+                UNREACHABLE_MSG("Invalid texture type={}", static_cast<u32>(type));
+                return 2;
+            }
+        }();
+
+        if (meta->element >= coords_count) {
+            return Constant(t_float, 0.0f);
+        }
+
+        const std::array<Id, 3> types = {t_int, t_int2, t_int3};
+        const Id sizes = Emit(OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod));
+        const Id size = Emit(OpCompositeExtract(t_int, sizes, meta->element));
+        return BitcastTo<Type::Float>(size);
+    }
+
+    Id TextureQueryLod(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id TexelFetch(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id Branch(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        UNIMPLEMENTED_IF(!target);
+
+        Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue())));
+        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        return {};
+    }
+
+    Id PushFlowStack(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        ASSERT(target);
+
+        const Id current = Emit(OpLoad(t_uint, flow_stack_top));
+        const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1)));
+        const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current));
+
+        Emit(OpStore(access, Constant(t_uint, target->GetValue())));
+        Emit(OpStore(flow_stack_top, next));
+        return {};
+    }
+
+    Id PopFlowStack(Operation operation) {
+        const Id current = Emit(OpLoad(t_uint, flow_stack_top));
+        const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1)));
+        const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous));
+        const Id target = Emit(OpLoad(t_uint, access));
+
+        Emit(OpStore(flow_stack_top, previous));
+        Emit(OpStore(jmp_to, target));
+        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        return {};
+    }
+
+    Id Exit(Operation operation) {
+        switch (stage) {
+        case ShaderStage::Vertex: {
+            // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
+            // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
+            const Id position = AccessElement(t_float4, per_vertex, position_index);
+            Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));
+            depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
+            depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
+            Emit(OpStore(AccessElement(t_out_float, position, 2), depth));
+            break;
+        }
+        case ShaderStage::Fragment: {
+            const auto SafeGetRegister = [&](u32 reg) {
+                // TODO(Rodrigo): Replace with contains once C++20 releases
+                if (const auto it = registers.find(reg); it != registers.end()) {
+                    return Emit(OpLoad(t_float, it->second));
+                }
+                return Constant(t_float, 0.0f);
+            };
+
+            UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
+                                 "Sample mask write is unimplemented");
+
+            // TODO(Rodrigo): Alpha testing
+
+            // Write the color outputs using the data in the shader registers, disabled
+            // rendertargets/components are skipped in the register assignment.
+            u32 current_reg = 0;
+            for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+                // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
+                for (u32 component = 0; component < 4; ++component) {
+                    if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                        Emit(OpStore(AccessElement(t_out_float, frag_colors.at(rt), component),
+                                     SafeGetRegister(current_reg)));
+                        ++current_reg;
+                    }
+                }
+            }
+            if (header.ps.omap.depth) {
+                // The depth output is always 2 registers after the last color output, and
+                // current_reg already contains one past the last color register.
+                Emit(OpStore(frag_depth, SafeGetRegister(current_reg + 1)));
+            }
+            break;
+        }
+        }
+
+        BranchingOp([&]() { Emit(OpReturn()); });
+        return {};
+    }
+
+    Id Discard(Operation operation) {
+        BranchingOp([&]() { Emit(OpKill()); });
+        return {};
+    }
+
+    Id EmitVertex(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id EndPrimitive(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id YNegate(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
+                      const std::string& name) {
+        const Id id = OpVariable(type, storage);
+        Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
+        AddGlobalVariable(Name(id, name));
+        interfaces.push_back(id);
+        return id;
+    }
+
+    bool IsRenderTargetUsed(u32 rt) const {
+        for (u32 component = 0; component < 4; ++component) {
+            if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    template <typename... Args>
+    Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
+        std::vector<Id> members;
+        auto elements = {elements_...};
+        for (const auto element : elements) {
+            members.push_back(Constant(t_uint, element));
+        }
+
+        return Emit(OpAccessChain(pointer_type, composite, members));
+    }
+
+    template <Type type>
+    Id VisitOperand(Operation operation, std::size_t operand_index) {
+        const Id value = Visit(operation[operand_index]);
+
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return Emit(OpBitcast(t_int, value));
+        case Type::Uint:
+            return Emit(OpBitcast(t_uint, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    template <Type type>
+    Id BitcastFrom(Id value) {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+        case Type::Uint:
+            return Emit(OpBitcast(t_float, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    template <Type type>
+    Id BitcastTo(Id value) {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+            UNREACHABLE();
+        case Type::Float:
+            return Emit(OpBitcast(t_float, value));
+        case Type::Int:
+            return Emit(OpBitcast(t_int, value));
+        case Type::Uint:
+            return Emit(OpBitcast(t_uint, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    Id GetTypeDefinition(Type type) {
+        switch (type) {
+        case Type::Bool:
+            return t_bool;
+        case Type::Bool2:
+            return t_bool2;
+        case Type::Float:
+            return t_float;
+        case Type::Int:
+            return t_int;
+        case Type::Uint:
+            return t_uint;
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return {};
+    }
+
+    void BranchingOp(std::function<void()> call) {
+        const Id true_label = OpLabel();
+        const Id skip_label = OpLabel();
+        Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten));
+        Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0));
+        Emit(true_label);
+        call();
+
+        Emit(skip_label);
+    }
+
+    static constexpr OperationDecompilersArray operation_decompilers = {
+        &SPIRVDecompiler::Assign,
+
+        &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
+                                  Type::Float>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
+        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
+        &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
+
+        &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
+        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
+        &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
+
+        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
+        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
+        &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
+        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
+        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
+        &SPIRVDecompiler::HNegate,
+        &SPIRVDecompiler::HMergeF32,
+        &SPIRVDecompiler::HMergeH0,
+        &SPIRVDecompiler::HMergeH1,
+        &SPIRVDecompiler::HPack2,
+
+        &SPIRVDecompiler::LogicalAssign,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
+        &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
+        &SPIRVDecompiler::LogicalPick2,
+        &SPIRVDecompiler::LogicalAll2,
+        &SPIRVDecompiler::LogicalAny2,
+
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool>,
+
+        &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
+
+        &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>,
+
+        &SPIRVDecompiler::Texture,
+        &SPIRVDecompiler::TextureLod,
+        &SPIRVDecompiler::TextureGather,
+        &SPIRVDecompiler::TextureQueryDimensions,
+        &SPIRVDecompiler::TextureQueryLod,
+        &SPIRVDecompiler::TexelFetch,
+
+        &SPIRVDecompiler::Branch,
+        &SPIRVDecompiler::PushFlowStack,
+        &SPIRVDecompiler::PopFlowStack,
+        &SPIRVDecompiler::Exit,
+        &SPIRVDecompiler::Discard,
+
+        &SPIRVDecompiler::EmitVertex,
+        &SPIRVDecompiler::EndPrimitive,
+
+        &SPIRVDecompiler::YNegate,
+    };
+
+    const ShaderIR& ir;
+    const ShaderStage stage;
+    const Tegra::Shader::Header header;
+
+    const Id t_void = Name(TypeVoid(), "void");
+
+    const Id t_bool = Name(TypeBool(), "bool");
+    const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
+
+    const Id t_int = Name(TypeInt(32, true), "int");
+    const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
+    const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
+    const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
+
+    const Id t_uint = Name(TypeInt(32, false), "uint");
+    const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
+    const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
+    const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
+
+    const Id t_float = Name(TypeFloat(32), "float");
+    const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
+    const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
+    const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
+
+    const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
+    const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
+
+    const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
+
+    const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
+    const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
+    const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
+    const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
+
+    const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
+    const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
+
+    const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
+    const Id t_cbuf_array =
+        Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),
+                 spv::Decoration::ArrayStride, CBUF_STRIDE);
+    const Id t_cbuf_struct = MemberDecorate(
+        Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);
+
+    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
+    const Id t_gmem_array =
+        Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4u), "GmemArray");
+    const Id t_gmem_struct = MemberDecorate(
+        Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
+
+    const Id v_float_zero = Constant(t_float, 0.0f);
+    const Id v_true = ConstantTrue(t_bool);
+    const Id v_false = ConstantFalse(t_bool);
+
+    Id per_vertex{};
+    std::map<u32, Id> registers;
+    std::map<Tegra::Shader::Pred, Id> predicates;
+    Id local_memory{};
+    std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
+    std::map<Attribute::Index, Id> input_attributes;
+    std::map<Attribute::Index, Id> output_attributes;
+    std::map<u32, Id> constant_buffers;
+    std::map<GlobalMemoryBase, Id> global_buffers;
+    std::map<u32, SamplerImage> sampler_images;
+
+    Id instance_index{};
+    Id vertex_index{};
+    std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
+    Id frag_depth{};
+    Id frag_coord{};
+    Id front_facing{};
+
+    u32 position_index{};
+    u32 point_size_index{};
+    u32 clip_distances_index{};
+
+    std::vector<Id> interfaces;
+
+    u32 const_buffers_base_binding{};
+    u32 global_buffers_base_binding{};
+    u32 samplers_base_binding{};
+
+    Id execute_function{};
+    Id jmp_to{};
+    Id flow_stack_top{};
+    Id flow_stack{};
+    Id continue_label{};
+    std::map<u32, Id> labels;
+};
+
+DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {
+    auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);
+    decompiler->Decompile();
+    return {std::move(decompiler), decompiler->GetShaderEntries()};
+}
+
+} // namespace Vulkan::VKShader
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
new file mode 100644
index 000000000..329d8fa38
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+class ShaderIR;
+}
+
+namespace Vulkan::VKShader {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using SamplerEntry = VideoCommon::Shader::Sampler;
+
+constexpr u32 DESCRIPTOR_SET = 0;
+
+class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
+public:
+    explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index)
+        : VideoCommon::Shader::ConstBuffer{entry}, index{index} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+private:
+    u32 index{};
+};
+
+class GlobalBufferEntry {
+public:
+    explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+
+    u32 GetCbufIndex() const {
+        return cbuf_index;
+    }
+
+    u32 GetCbufOffset() const {
+        return cbuf_offset;
+    }
+
+private:
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+};
+
+struct ShaderEntries {
+    u32 const_buffers_base_binding{};
+    u32 global_buffers_base_binding{};
+    u32 samplers_base_binding{};
+    std::vector<ConstBufferEntry> const_buffers;
+    std::vector<GlobalBufferEntry> global_buffers;
+    std::vector<SamplerEntry> samplers;
+    std::set<u32> attributes;
+    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+    std::size_t shader_length{};
+    Sirit::Id entry_function{};
+    std::vector<Sirit::Id> interfaces;
+};
+
+using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
+
+DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
+
+} // namespace Vulkan::VKShader
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
+constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+                                                                                   pipeline_stage} {
+    CreateBuffers(memory_manager, usage);
+    ReserveWatches(WATCHES_INITIAL_RESERVE);
+}
+
+VKStreamBuffer::~VKStreamBuffer() = default;
+
+std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+    ASSERT(size <= buffer_size);
+    mapped_size = size;
+
+    if (offset + size > buffer_size) {
+        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+        // reset the state.
+        invalidation_mark = used_watches;
+        used_watches = 0;
+        offset = 0;
+    }
+
+    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+}
+
+VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    if (invalidation_mark) {
+        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        exctx = scheduler.Flush();
+        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
+                      [&](auto& resource) { resource->Wait(); });
+        invalidation_mark = std::nullopt;
+    }
+
+    if (used_watches + 1 >= watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(WATCHES_RESERVE_CHUNK);
+    }
+    // Add a watch for this allocation.
+    watches[used_watches++]->Watch(exctx.GetFence());
+
+    offset += size;
+
+    return exctx;
+}
+
+void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
+                                         nullptr);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    commit = memory_manager.Commit(*buffer, true);
+    mapped_pointer = commit->GetData();
+}
+
+void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+    const std::size_t previous_size = watches.size();
+    watches.resize(previous_size + grow_size);
+    std::generate(watches.begin() + previous_size, watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKFenceWatch;
+class VKResourceManager;
+class VKScheduler;
+
+class VKStreamBuffer {
+public:
+    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    ~VKStreamBuffer();
+
+    /**
+     * Reserves a region of memory from the stream buffer.
+     * @param size Size to reserve.
+     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
+     * offset and a boolean that's true when buffer has been invalidated.
+     */
+    std::tuple<u8*, u64, bool> Reserve(u64 size);
+
+    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
+    [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+
+    vk::Buffer GetBuffer() const {
+        return *buffer;
+    }
+
+private:
+    /// Creates Vulkan buffer handles committing the required the required memory.
+    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+
+    /// Increases the amount of watches available.
+    void ReserveWatches(std::size_t grow_size);
+
+    const VKDevice& device;                      ///< Vulkan device manager.
+    VKScheduler& scheduler;                      ///< Command scheduler.
+    const u64 buffer_size;                       ///< Total size of the stream buffer.
+    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
+    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
+
+    UniqueBuffer buffer;   ///< Mapped buffer.
+    VKMemoryCommit commit; ///< Memory commit.
+    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+
+    u64 offset{};      ///< Buffer iterator.
+    u64 mapped_size{}; ///< Size reserved for the current copy.
+
+    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t>
+        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
new file mode 100644
index 000000000..08279e562
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -0,0 +1,210 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/frontend/framebuffer_layout.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
+    if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
+        return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
+    }
+    const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+        return format.format == vk::Format::eB8G8R8A8Unorm &&
+               format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
+    });
+    return found != formats.end() ? *found : formats[0];
+}
+
+vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) {
+    // Mailbox doesn't lock the application like fifo (vsync), prefer it
+    const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) {
+        return mode == vk::PresentModeKHR::eMailbox;
+    });
+    return found != modes.end() ? *found : vk::PresentModeKHR::eFifo;
+}
+
+vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+                              u32 height) {
+    constexpr auto undefined_size{std::numeric_limits<u32>::max()};
+    if (capabilities.currentExtent.width != undefined_size) {
+        return capabilities.currentExtent;
+    }
+    vk::Extent2D extent = {width, height};
+    extent.width = std::max(capabilities.minImageExtent.width,
+                            std::min(capabilities.maxImageExtent.width, extent.width));
+    extent.height = std::max(capabilities.minImageExtent.height,
+                             std::min(capabilities.maxImageExtent.height, extent.height));
+    return extent;
+}
+} // namespace
+
+VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
+    : surface{surface}, device{device} {}
+
+VKSwapchain::~VKSwapchain() = default;
+
+void VKSwapchain::Create(u32 width, u32 height) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto physical_device = device.GetPhysical();
+
+    const vk::SurfaceCapabilitiesKHR capabilities{
+        physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
+    if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
+        return;
+    }
+
+    dev.waitIdle(dld);
+    Destroy();
+
+    CreateSwapchain(capabilities, width, height);
+    CreateSemaphores();
+    CreateImageViews();
+
+    fences.resize(image_count, nullptr);
+}
+
+void VKSwapchain::AcquireNextImage() {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+    dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
+                            *present_semaphores[frame_index], {}, &image_index, dld);
+
+    if (auto& fence = fences[image_index]; fence) {
+        fence->Wait();
+        fence->Release();
+        fence = nullptr;
+    }
+}
+
+bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
+    const vk::Semaphore present_semaphore{*present_semaphores[frame_index]};
+    const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore};
+    const u32 wait_semaphore_count{render_semaphore ? 2U : 1U};
+    const auto& dld{device.GetDispatchLoader()};
+    const auto present_queue{device.GetPresentQueue()};
+    bool recreated = false;
+
+    const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1,
+                                          &swapchain.get(), &image_index, {});
+    switch (const auto result = present_queue.presentKHR(&present_info, dld); result) {
+    case vk::Result::eSuccess:
+        break;
+    case vk::Result::eErrorOutOfDateKHR:
+        if (current_width > 0 && current_height > 0) {
+            Create(current_width, current_height);
+            recreated = true;
+        }
+        break;
+    default:
+        LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!",
+                     vk::to_string(result));
+        UNREACHABLE();
+    }
+
+    ASSERT(fences[image_index] == nullptr);
+    fences[image_index] = &fence;
+    frame_index = (frame_index + 1) % image_count;
+    return recreated;
+}
+
+bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
+    // TODO(Rodrigo): Handle framebuffer pixel format changes
+    return framebuffer.width != current_width || framebuffer.height != current_height;
+}
+
+void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+                                  u32 height) {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+    const auto physical_device{device.GetPhysical()};
+
+    const std::vector<vk::SurfaceFormatKHR> formats{
+        physical_device.getSurfaceFormatsKHR(surface, dld)};
+
+    const std::vector<vk::PresentModeKHR> present_modes{
+        physical_device.getSurfacePresentModesKHR(surface, dld)};
+
+    const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
+    const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
+    extent = ChooseSwapExtent(capabilities, width, height);
+
+    current_width = extent.width;
+    current_height = extent.height;
+
+    u32 requested_image_count{capabilities.minImageCount + 1};
+    if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
+        requested_image_count = capabilities.maxImageCount;
+    }
+
+    vk::SwapchainCreateInfoKHR swapchain_ci(
+        {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace,
+        extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {},
+        capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false,
+        {});
+
+    const u32 graphics_family{device.GetGraphicsFamily()};
+    const u32 present_family{device.GetPresentFamily()};
+    const std::array<u32, 2> queue_indices{graphics_family, present_family};
+    if (graphics_family != present_family) {
+        swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent;
+        swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
+        swapchain_ci.pQueueFamilyIndices = queue_indices.data();
+    } else {
+        swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
+    }
+
+    swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
+
+    images = dev.getSwapchainImagesKHR(*swapchain, dld);
+    image_count = static_cast<u32>(images.size());
+    image_format = surface_format.format;
+}
+
+void VKSwapchain::CreateSemaphores() {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+
+    present_semaphores.resize(image_count);
+    for (std::size_t i = 0; i < image_count; i++) {
+        present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
+    }
+}
+
+void VKSwapchain::CreateImageViews() {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+
+    image_views.resize(image_count);
+    for (std::size_t i = 0; i < image_count; i++) {
+        const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D,
+                                                    image_format, {},
+                                                    {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1});
+        image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld);
+    }
+}
+
+void VKSwapchain::Destroy() {
+    frame_index = 0;
+    present_semaphores.clear();
+    framebuffers.clear();
+    image_views.clear();
+    swapchain.reset();
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
new file mode 100644
index 000000000..2ad84f185
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -0,0 +1,92 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Layout {
+struct FramebufferLayout;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+
+class VKSwapchain {
+public:
+    explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device);
+    ~VKSwapchain();
+
+    /// Creates (or recreates) the swapchain with a given size.
+    void Create(u32 width, u32 height);
+
+    /// Acquires the next image in the swapchain, waits as needed.
+    void AcquireNextImage();
+
+    /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
+    /// recreated. Takes responsability for the ownership of fence.
+    bool Present(vk::Semaphore render_semaphore, VKFence& fence);
+
+    /// Returns true when the framebuffer layout has changed.
+    bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
+
+    const vk::Extent2D& GetSize() const {
+        return extent;
+    }
+
+    u32 GetImageCount() const {
+        return image_count;
+    }
+
+    u32 GetImageIndex() const {
+        return image_index;
+    }
+
+    vk::Image GetImageIndex(u32 index) const {
+        return images[index];
+    }
+
+    vk::ImageView GetImageViewIndex(u32 index) const {
+        return *image_views[index];
+    }
+
+    vk::Format GetImageFormat() const {
+        return image_format;
+    }
+
+private:
+    void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
+    void CreateSemaphores();
+    void CreateImageViews();
+
+    void Destroy();
+
+    const vk::SurfaceKHR surface;
+    const VKDevice& device;
+
+    UniqueSwapchainKHR swapchain;
+
+    u32 image_count{};
+    std::vector<vk::Image> images;
+    std::vector<UniqueImageView> image_views;
+    std::vector<UniqueFramebuffer> framebuffers;
+    std::vector<VKFence*> fences;
+    std::vector<UniqueSemaphore> present_semaphores;
+
+    u32 image_index{};
+    u32 frame_index{};
+
+    vk::Format image_format{};
+    vk::Extent2D extent{};
+
+    u32 current_width{};
+    u32 current_height{};
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
new file mode 100644
index 000000000..e4c438792
--- /dev/null
+++ b/src/video_core/shader/decode.cpp
@@ -0,0 +1,209 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <set>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+namespace {
+
+/// Merges exit method of two parallel branches.
+constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
+    if (a == ExitMethod::Undetermined) {
+        return b;
+    }
+    if (b == ExitMethod::Undetermined) {
+        return a;
+    }
+    if (a == b) {
+        return a;
+    }
+    return ExitMethod::Conditional;
+}
+
+/**
+ * Returns whether the instruction at the specified offset is a 'sched' instruction.
+ * Sched instructions always appear before a sequence of 3 instructions.
+ */
+constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
+    constexpr u32 SchedPeriod = 4;
+    u32 absolute_offset = offset - main_offset;
+
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+} // namespace
+
+void ShaderIR::Decode() {
+    std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
+
+    std::set<u32> labels;
+    const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
+    if (exit_method != ExitMethod::AlwaysEnd) {
+        UNREACHABLE_MSG("Program does not always end");
+    }
+
+    if (labels.empty()) {
+        basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
+        return;
+    }
+
+    labels.insert(main_offset);
+
+    for (const u32 label : labels) {
+        const auto next_it = labels.lower_bound(label + 1);
+        const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
+
+        basic_blocks.insert({label, DecodeRange(label, next_label)});
+    }
+}
+
+ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
+    const auto [iter, inserted] =
+        exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
+    ExitMethod& exit_method = iter->second;
+    if (!inserted)
+        return exit_method;
+
+    for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
+        coverage_begin = std::min(coverage_begin, offset);
+        coverage_end = std::max(coverage_end, offset + 1);
+
+        const Instruction instr = {program_code[offset]};
+        const auto opcode = OpCode::Decode(instr);
+        if (!opcode)
+            continue;
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::EXIT: {
+            // The EXIT instruction can be predicated, which means that the shader can conditionally
+            // end on this instruction. We have to consider the case where the condition is not met
+            // and check the exit method of that other basic block.
+            using Tegra::Shader::Pred;
+            if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
+                return exit_method = ExitMethod::AlwaysEnd;
+            } else {
+                const ExitMethod not_met = Scan(offset + 1, end, labels);
+                return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
+            }
+        }
+        case OpCode::Id::BRA: {
+            const u32 target = offset + instr.bra.GetBranchTarget();
+            labels.insert(target);
+            const ExitMethod no_jmp = Scan(offset + 1, end, labels);
+            const ExitMethod jmp = Scan(target, end, labels);
+            return exit_method = ParallelExit(no_jmp, jmp);
+        }
+        case OpCode::Id::SSY:
+        case OpCode::Id::PBK: {
+            // The SSY and PBK use a similar encoding as the BRA instruction.
+            UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                                 "Constant buffer branching is not supported");
+            const u32 target = offset + instr.bra.GetBranchTarget();
+            labels.insert(target);
+            // Continue scanning for an exit method.
+            break;
+        }
+        }
+    }
+    return exit_method = ExitMethod::AlwaysReturn;
+}
+
+NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
+    NodeBlock basic_block;
+    for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
+        pc = DecodeInstr(basic_block, pc);
+    }
+    return basic_block;
+}
+
+u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
+    // Ignore sched instructions when generating code.
+    if (IsSchedInstruction(pc, main_offset)) {
+        return pc + 1;
+    }
+
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    // Decoding failure
+    if (!opcode) {
+        UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
+        return pc + 1;
+    }
+
+    bb.push_back(
+        Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value)));
+
+    using Tegra::Shader::Pred;
+    UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
+                         "NeverExecute predicate not implemented");
+
+    static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
+        {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
+        {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
+        {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
+        {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
+        {OpCode::Type::Shift, &ShaderIR::DecodeShift},
+        {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
+        {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
+        {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
+        {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
+        {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
+        {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
+        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
+        {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
+        {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
+        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
+        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
+        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
+        {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
+        {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
+        {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
+        {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
+        {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
+        {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
+        {OpCode::Type::Video, &ShaderIR::DecodeVideo},
+        {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
+    };
+
+    std::vector<Node> tmp_block;
+    if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
+        pc = (this->*decoder->second)(tmp_block, pc);
+    } else {
+        pc = DecodeOther(tmp_block, pc);
+    }
+
+    // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
+    // executed.
+    const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
+    const auto pred_index = static_cast<u32>(instr.pred.pred_index);
+
+    if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
+        const Node conditional =
+            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
+        global_code.push_back(conditional);
+        bb.push_back(conditional);
+    } else {
+        for (auto& node : tmp_block) {
+            global_code.push_back(node);
+            bb.push_back(node);
+        }
+    }
+
+    return pc + 1;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
new file mode 100644
index 000000000..3190e2d7c
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -0,0 +1,155 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::SubOp;
+
+u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+
+    Node op_b = [&]() -> Node {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::MOV_C:
+    case OpCode::Id::MOV_R: {
+        // MOV does not have neither 'abs' nor 'neg' bits.
+        SetRegister(bb, instr.gpr0, op_b);
+        break;
+    }
+    case OpCode::Id::FMUL_C:
+    case OpCode::Id::FMUL_R:
+    case OpCode::Id::FMUL_IMM: {
+        // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+        UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
+                             instr.fmul.tab5cb8_2.Value());
+        UNIMPLEMENTED_IF_MSG(
+            instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
+            instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
+
+        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
+
+        // TODO(Rodrigo): Should precise be used when there's a postfactor?
+        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+
+        if (instr.fmul.postfactor != 0) {
+            auto postfactor = static_cast<s32>(instr.fmul.postfactor);
+
+            // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
+            // logic.
+            if (postfactor >= 4) {
+                postfactor = 7 - postfactor;
+            } else {
+                postfactor = 0 - postfactor;
+            }
+
+            if (postfactor > 0) {
+                value = Operation(OperationCode::FMul, NO_PRECISE, value,
+                                  Immediate(static_cast<f32>(1 << postfactor)));
+            } else {
+                value = Operation(OperationCode::FDiv, NO_PRECISE, value,
+                                  Immediate(static_cast<f32>(1 << -postfactor)));
+            }
+        }
+
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FADD_C:
+    case OpCode::Id::FADD_R:
+    case OpCode::Id::FADD_IMM: {
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
+        Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::MUFU: {
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+
+        Node value = [&]() {
+            switch (instr.sub_op) {
+            case SubOp::Cos:
+                return Operation(OperationCode::FCos, PRECISE, op_a);
+            case SubOp::Sin:
+                return Operation(OperationCode::FSin, PRECISE, op_a);
+            case SubOp::Ex2:
+                return Operation(OperationCode::FExp2, PRECISE, op_a);
+            case SubOp::Lg2:
+                return Operation(OperationCode::FLog2, PRECISE, op_a);
+            case SubOp::Rcp:
+                return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
+            case SubOp::Rsq:
+                return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
+            case SubOp::Sqrt:
+                return Operation(OperationCode::FSqrt, PRECISE, op_a);
+            default:
+                UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
+                                  static_cast<unsigned>(instr.sub_op.Value()));
+                return Immediate(0);
+            }
+        }();
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FMNMX_C:
+    case OpCode::Id::FMNMX_R:
+    case OpCode::Id::FMNMX_IMM: {
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
+        const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
+
+        const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
+        const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
+        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::RRO_C:
+    case OpCode::Id::RRO_R:
+    case OpCode::Id::RRO_IMM: {
+        // Currently RRO is only implemented as a register move.
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+        SetRegister(bb, instr.gpr0, op_b);
+        LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
new file mode 100644
index 000000000..baee89107
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -0,0 +1,70 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
+        opcode->get().GetId() == OpCode::Id::HADD2_R) {
+        UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
+    }
+    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
+
+    const bool negate_a =
+        opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
+    const bool negate_b =
+        opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
+
+    const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
+
+    // instr.alu_half.type_a
+
+    Node op_b = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_C:
+        case OpCode::Id::HMUL2_C:
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        case OpCode::Id::HADD2_R:
+        case OpCode::Id::HMUL2_R:
+            return GetRegister(instr.gpr20);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+    op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
+
+    Node value = [&]() {
+        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_C:
+        case OpCode::Id::HADD2_R:
+            return Operation(OperationCode::HAdd, meta, op_a, op_b);
+        case OpCode::Id::HMUL2_C:
+        case OpCode::Id::HMUL2_R:
+            return Operation(OperationCode::HMul, meta, op_a, op_b);
+        default:
+            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
+            return Immediate(0);
+        }
+    }();
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
new file mode 100644
index 000000000..c2164ba50
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -0,0 +1,51 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
+        UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
+    } else {
+        UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
+    }
+    UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
+                         "Half float immediate saturation not implemented");
+
+    Node op_a = GetRegister(instr.gpr8);
+    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
+
+    const Node op_b = UnpackHalfImmediate(instr, true);
+
+    Node value = [&]() {
+        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_IMM:
+            return Operation(OperationCode::HAdd, meta, op_a, op_b);
+        case OpCode::Id::HMUL2_IMM:
+            return Operation(OperationCode::HMul, meta, op_a, op_b);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
new file mode 100644
index 000000000..0d139c0d2
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -0,0 +1,52 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::MOV32_IMM: {
+        SetRegister(bb, instr.gpr0, GetImmediate32(instr));
+        break;
+    }
+    case OpCode::Id::FMUL32_IMM: {
+        Node value =
+            Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
+        value = GetSaturatedFloat(value, instr.fmul32.saturate);
+
+        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FADD32I: {
+        const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
+                                                instr.fadd32i.negate_a);
+        const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
+                                                instr.fadd32i.negate_b);
+
+        const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
+        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
+                          opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
new file mode 100644
index 000000000..9fd4b273e
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -0,0 +1,287 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::IAdd3Height;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::Register;
+
+u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::IADD_C:
+    case OpCode::Id::IADD_R:
+    case OpCode::Id::IADD_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
+
+        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::IADD3_C:
+    case OpCode::Id::IADD3_R:
+    case OpCode::Id::IADD3_IMM: {
+        Node op_c = GetRegister(instr.gpr39);
+
+        const auto ApplyHeight = [&](IAdd3Height height, Node value) {
+            switch (height) {
+            case IAdd3Height::None:
+                return value;
+            case IAdd3Height::LowerHalfWord:
+                return BitfieldExtract(value, 0, 16);
+            case IAdd3Height::UpperHalfWord:
+                return BitfieldExtract(value, 16, 16);
+            default:
+                UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
+                return Immediate(0);
+            }
+        };
+
+        if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
+            op_a = ApplyHeight(instr.iadd3.height_a, op_a);
+            op_b = ApplyHeight(instr.iadd3.height_b, op_b);
+            op_c = ApplyHeight(instr.iadd3.height_c, op_c);
+        }
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
+        op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
+
+        const Node value = [&]() {
+            const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+            if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
+                return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
+            }
+            const Node shifted = [&]() {
+                switch (instr.iadd3.mode) {
+                case Tegra::Shader::IAdd3Mode::RightShift:
+                    // TODO(tech4me): According to
+                    // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
+                    // The addition between op_a and op_b should be done in uint33, more
+                    // investigation required
+                    return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
+                                     Immediate(16));
+                case Tegra::Shader::IAdd3Mode::LeftShift:
+                    return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
+                                     Immediate(16));
+                default:
+                    return add_ab;
+                }
+            }();
+            return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
+        }();
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::ISCADD_C:
+    case OpCode::Id::ISCADD_R:
+    case OpCode::Id::ISCADD_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in ISCADD is not implemented");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
+
+        const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
+        const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
+        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::POPC_C:
+    case OpCode::Id::POPC_R:
+    case OpCode::Id::POPC_IMM: {
+        if (instr.popc.invert) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+        }
+        const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::SEL_C:
+    case OpCode::Id::SEL_R:
+    case OpCode::Id::SEL_IMM: {
+        const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
+        const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::LOP_C:
+    case OpCode::Id::LOP_R:
+    case OpCode::Id::LOP_IMM: {
+        if (instr.alu.lop.invert_a)
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+        if (instr.alu.lop.invert_b)
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
+                            instr.generates_cc);
+        break;
+    }
+    case OpCode::Id::LOP3_C:
+    case OpCode::Id::LOP3_R:
+    case OpCode::Id::LOP3_IMM: {
+        const Node op_c = GetRegister(instr.gpr39);
+        const Node lut = [&]() {
+            if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
+                return Immediate(instr.alu.lop3.GetImmLut28());
+            } else {
+                return Immediate(instr.alu.lop3.GetImmLut48());
+            }
+        }();
+
+        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
+        break;
+    }
+    case OpCode::Id::IMNMX_C:
+    case OpCode::Id::IMNMX_R:
+    case OpCode::Id::IMNMX_IMM: {
+        UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
+
+        const bool is_signed = instr.imnmx.is_signed;
+
+        const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
+        const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
+        const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
+        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::LEA_R2:
+    case OpCode::Id::LEA_R1:
+    case OpCode::Id::LEA_IMM:
+    case OpCode::Id::LEA_RZ:
+    case OpCode::Id::LEA_HI: {
+        const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::LEA_R2: {
+                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
+                        Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
+            }
+
+            case OpCode::Id::LEA_R1: {
+                const bool neg = instr.lea.r1.neg != 0;
+                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        GetRegister(instr.gpr20),
+                        Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
+            }
+
+            case OpCode::Id::LEA_IMM: {
+                const bool neg = instr.lea.imm.neg != 0;
+                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
+                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
+            }
+
+            case OpCode::Id::LEA_RZ: {
+                const bool neg = instr.lea.rz.neg != 0;
+                return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
+                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
+            }
+
+            case OpCode::Id::LEA_HI:
+            default:
+                UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
+
+                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
+            }
+        }();
+
+        UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
+                             "Unhandled LEA Predicate");
+
+        const Node shifted_c =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
+        const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
+        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
+
+        SetRegister(bb, instr.gpr0, value);
+
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
+                                    Node imm_lut, bool sets_cc) {
+    constexpr u32 lop_iterations = 32;
+    const Node one = Immediate(1);
+    const Node two = Immediate(2);
+
+    Node value{};
+    for (u32 i = 0; i < lop_iterations; ++i) {
+        const Node shift_amount = Immediate(i);
+
+        const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
+        const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
+
+        const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
+        const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
+        const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
+
+        const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
+        const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
+        const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
+
+        const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
+        const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
+
+        const Node shifted_bit =
+            Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
+        const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
+
+        const Node right =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
+
+        if (i > 0) {
+            value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
+        } else {
+            value = right;
+        }
+    }
+
+    SetInternalFlagsFromInteger(bb, value, sets_cc);
+    SetRegister(bb, dest, value);
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
new file mode 100644
index 000000000..3ed5ccc5a
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -0,0 +1,96 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::LogicOperation;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::PredicateResultMode;
+using Tegra::Shader::Register;
+
+u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::IADD32I: {
+        UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
+
+        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+
+        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::LOP32I: {
+        if (instr.alu.lop32i.invert_a)
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+
+        if (instr.alu.lop32i.invert_b)
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
+                            PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
+                          opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
+                                   Node op_b, PredicateResultMode predicate_mode, Pred predicate,
+                                   bool sets_cc) {
+    const Node result = [&]() {
+        switch (logic_op) {
+        case LogicOperation::And:
+            return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
+        case LogicOperation::Or:
+            return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
+        case LogicOperation::Xor:
+            return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
+        case LogicOperation::PassB:
+            return op_b;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
+            return Immediate(0);
+        }
+    }();
+
+    SetInternalFlagsFromInteger(bb, result, sets_cc);
+    SetRegister(bb, dest, result);
+
+    // Write the predicate value depending on the predicate mode.
+    switch (predicate_mode) {
+    case PredicateResultMode::None:
+        // Do nothing.
+        return;
+    case PredicateResultMode::NotZero: {
+        // Set the predicate to true if the result is not zero.
+        const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
+        SetPredicate(bb, static_cast<u64>(predicate), compare);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
+                          static_cast<u32>(predicate_mode));
+    }
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
new file mode 100644
index 000000000..6a95dc928
--- /dev/null
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -0,0 +1,49 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF(instr.bfe.negate_b);
+
+    Node op_a = GetRegister(instr.gpr8);
+    op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::BFE_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in BFE is not implemented");
+
+        const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
+        const Node outer_shift_imm =
+            Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
+
+        const Node inner_shift =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
+        const Node outer_shift =
+            Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
+
+        SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, outer_shift);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
new file mode 100644
index 000000000..601d66f1f
--- /dev/null
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -0,0 +1,41 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::BFI_IMM_R:
+            return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
+        default:
+            UNREACHABLE();
+            return {Immediate(0), Immediate(0)};
+        }
+    }();
+    const Node insert = GetRegister(instr.gpr8);
+    const Node offset = BitfieldExtract(packed_shift, 0, 8);
+    const Node bits = BitfieldExtract(packed_shift, 8, 8);
+
+    const Node value =
+        Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
+
+    SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
new file mode 100644
index 000000000..55a6fbbf2
--- /dev/null
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -0,0 +1,149 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+
+u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::I2I_R: {
+        UNIMPLEMENTED_IF(instr.conversion.selector);
+
+        const bool input_signed = instr.conversion.is_input_signed;
+        const bool output_signed = instr.conversion.is_output_signed;
+
+        Node value = GetRegister(instr.gpr20);
+        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
+
+        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
+                                        input_signed);
+        if (input_signed != output_signed) {
+            value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
+        }
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::I2F_R:
+    case OpCode::Id::I2F_C: {
+        UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.selector);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in I2F is not implemented");
+
+        Node value = [&]() {
+            if (instr.is_b_gpr) {
+                return GetRegister(instr.gpr20);
+            } else {
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            }
+        }();
+        const bool input_signed = instr.conversion.is_input_signed;
+        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
+        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
+        value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
+        value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::F2F_R:
+    case OpCode::Id::F2F_C: {
+        UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in F2F is not implemented");
+
+        Node value = [&]() {
+            if (instr.is_b_gpr) {
+                return GetRegister(instr.gpr20);
+            } else {
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            }
+        }();
+
+        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
+
+        value = [&]() {
+            switch (instr.conversion.f2f.rounding) {
+            case Tegra::Shader::F2fRoundingOp::None:
+                return value;
+            case Tegra::Shader::F2fRoundingOp::Round:
+                return Operation(OperationCode::FRoundEven, PRECISE, value);
+            case Tegra::Shader::F2fRoundingOp::Floor:
+                return Operation(OperationCode::FFloor, PRECISE, value);
+            case Tegra::Shader::F2fRoundingOp::Ceil:
+                return Operation(OperationCode::FCeil, PRECISE, value);
+            case Tegra::Shader::F2fRoundingOp::Trunc:
+                return Operation(OperationCode::FTrunc, PRECISE, value);
+            }
+            UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
+                              static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+            return Immediate(0);
+        }();
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::F2I_R:
+    case OpCode::Id::F2I_C: {
+        UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in F2I is not implemented");
+        Node value = [&]() {
+            if (instr.is_b_gpr) {
+                return GetRegister(instr.gpr20);
+            } else {
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            }
+        }();
+
+        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
+
+        value = [&]() {
+            switch (instr.conversion.f2i.rounding) {
+            case Tegra::Shader::F2iRoundingOp::RoundEven:
+                return Operation(OperationCode::FRoundEven, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Floor:
+                return Operation(OperationCode::FFloor, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Ceil:
+                return Operation(OperationCode::FCeil, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Trunc:
+                return Operation(OperationCode::FTrunc, PRECISE, value);
+            default:
+                UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
+                                  static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+                return Immediate(0);
+            }
+        }();
+        const bool is_signed = instr.conversion.is_output_signed;
+        value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
+        value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/video_core/shader/decode/decode_integer_set.cpp
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
new file mode 100644
index 000000000..0559cc8de
--- /dev/null
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -0,0 +1,59 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
+    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
+                         instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
+    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
+                         instr.ffma.tab5980_1.Value());
+
+    const Node op_a = GetRegister(instr.gpr8);
+
+    auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::FFMA_CR: {
+            return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                    GetRegister(instr.gpr39)};
+        }
+        case OpCode::Id::FFMA_RR:
+            return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+        case OpCode::Id::FFMA_RC: {
+            return {GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        }
+        case OpCode::Id::FFMA_IMM:
+            return {GetImmediate19(instr), GetRegister(instr.gpr39)};
+        default:
+            UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
+            return {Immediate(0), Immediate(0)};
+        }
+    }();
+
+    op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
+    op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
+
+    Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
+    value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+    SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
new file mode 100644
index 000000000..1bd6755dd
--- /dev/null
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -0,0 +1,58 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
+                                            instr.fset.neg_a != 0);
+
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
+
+    // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
+    // condition is true, and to 0 otherwise.
+    const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
+    const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
+
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
+    const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    if (instr.fset.bf) {
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    } else {
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+    }
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
new file mode 100644
index 000000000..9285b8d05
--- /dev/null
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -0,0 +1,56 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
+                                            instr.fsetp.neg_a != 0);
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+    op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
+    const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
+    const Node value = Operation(combiner, predicate, second_pred);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    SetPredicate(bb, instr.fsetp.pred3, value);
+
+    if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+        // if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
+        const Node second_value = Operation(combiner, negated_pred, second_pred);
+        SetPredicate(bb, instr.fsetp.pred0, second_value);
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
new file mode 100644
index 000000000..748368555
--- /dev/null
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -0,0 +1,67 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
+
+    // instr.hset2.type_a
+    // instr.hset2.type_b
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HSET2_R:
+            return GetRegister(instr.gpr20);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
+    op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
+
+    const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+    MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}};
+    const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
+
+    // HSET2 operates on each half float in the pack.
+    std::array<Node, 2> values;
+    for (u32 i = 0; i < 2; ++i) {
+        const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
+        const Node true_value = Immediate(raw_value << (i * 16));
+        const Node false_value = Immediate(0);
+
+        const Node comparison =
+            Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+        const Node predicate = Operation(combiner, comparison, second_pred);
+
+        values[i] =
+            Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+    }
+
+    const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
new file mode 100644
index 000000000..e68512692
--- /dev/null
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -0,0 +1,62 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
+
+    Node op_a = GetRegister(instr.gpr8);
+    op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
+
+    const Node op_b = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HSETP2_R:
+            return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
+                                        instr.hsetp2.negate_b);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
+    const OperationCode pair_combiner =
+        instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
+
+    MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}};
+    const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
+    const Node first_pred = Operation(pair_combiner, comparison);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    const Node value = Operation(combiner, first_pred, second_pred);
+    SetPredicate(bb, instr.hsetp2.pred3, value);
+
+    if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
+        SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
new file mode 100644
index 000000000..7a07c5ec6
--- /dev/null
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -0,0 +1,77 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::HalfPrecision;
+using Tegra::Shader::HalfType;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
+        UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
+    } else {
+        UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
+    }
+
+    constexpr auto identity = HalfType::H0_H1;
+
+    const HalfType type_a = instr.hfma2.type_a;
+    const Node op_a = GetRegister(instr.gpr8);
+
+    bool neg_b{}, neg_c{};
+    auto [saturate, type_b, op_b, type_c,
+          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HFMA2_CR:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, instr.hfma2.type_b,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_RC:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
+                    instr.hfma2.type_b,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        case OpCode::Id::HFMA2_RR:
+            neg_b = instr.hfma2.rr.negate_b;
+            neg_c = instr.hfma2.rr.negate_c;
+            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
+                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_IMM_R:
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
+                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
+        default:
+            return {false, identity, Immediate(0), identity, Immediate(0)};
+        }
+    }();
+    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
+
+    op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
+    op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
+
+    MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
+    Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
new file mode 100644
index 000000000..a3bf17eba
--- /dev/null
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -0,0 +1,50 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const Node op_a = GetRegister(instr.gpr8);
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
+    // is true, and to 0 otherwise.
+    const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
+    const Node first_pred =
+        GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
+
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
+    const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
new file mode 100644
index 000000000..aad836d24
--- /dev/null
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -0,0 +1,53 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const Node op_a = GetRegister(instr.gpr8);
+
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
+    const Node predicate =
+        GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
+    const Node value = Operation(combiner, predicate, second_pred);
+    SetPredicate(bb, instr.isetp.pred3, value);
+
+    if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
+        SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
new file mode 100644
index 000000000..ea3c71eed
--- /dev/null
+++ b/src/video_core/shader/decode/memory.cpp
@@ -0,0 +1,239 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <vector>
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Attribute;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+
+u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::LD_A: {
+        // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
+                             "Indirect attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
+                             "Unaligned attribute loads are not supported");
+
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
+                                          Tegra::Shader::IpaSampleMode::Default};
+
+        u64 next_element = instr.attribute.fmt20.element;
+        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+        const auto LoadNextElement = [&](u32 reg_offset) {
+            const Node buffer = GetRegister(instr.gpr39);
+            const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
+                                                     next_element, input_mode, buffer);
+
+            SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
+
+            // Load the next attribute element into the following register. If the element
+            // to load goes beyond the vec4 size, load the first element of the next
+            // attribute.
+            next_element = (next_element + 1) % 4;
+            next_index = next_index + (next_element == 0 ? 1 : 0);
+        };
+
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            LoadNextElement(reg_offset);
+        }
+        break;
+    }
+    case OpCode::Id::LD_C: {
+        UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
+
+        Node index = GetRegister(instr.gpr8);
+
+        const Node op_a =
+            GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
+
+        switch (instr.ld_c.type.Value()) {
+        case Tegra::Shader::UniformType::Single:
+            SetRegister(bb, instr.gpr0, op_a);
+            break;
+
+        case Tegra::Shader::UniformType::Double: {
+            const Node op_b =
+                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
+
+            SetTemporal(bb, 0, op_a);
+            SetTemporal(bb, 1, op_b);
+            SetRegister(bb, instr.gpr0, GetTemporal(0));
+            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value()));
+        }
+        break;
+    }
+    case OpCode::Id::LD_L: {
+        UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
+                             static_cast<u32>(instr.ld_l.unknown.Value()));
+
+        const auto GetLmem = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
+                                           immediate_offset);
+            return GetLocalMemory(address);
+        };
+
+        switch (instr.ldst_sl.type.Value()) {
+        case Tegra::Shader::StoreType::Bits32:
+        case Tegra::Shader::StoreType::Bits64:
+        case Tegra::Shader::StoreType::Bits128: {
+            const u32 count = [&]() {
+                switch (instr.ldst_sl.type.Value()) {
+                case Tegra::Shader::StoreType::Bits32:
+                    return 1;
+                case Tegra::Shader::StoreType::Bits64:
+                    return 2;
+                case Tegra::Shader::StoreType::Bits128:
+                    return 4;
+                default:
+                    UNREACHABLE();
+                    return 0;
+                }
+            }();
+            for (u32 i = 0; i < count; ++i)
+                SetTemporal(bb, i, GetLmem(i * 4));
+            for (u32 i = 0; i < count; ++i)
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
+                              static_cast<u32>(instr.ldst_sl.type.Value()));
+        }
+        break;
+    }
+    case OpCode::Id::LDG: {
+        const u32 count = [&]() {
+            switch (instr.ldg.type) {
+            case Tegra::Shader::UniformType::Single:
+                return 1;
+            case Tegra::Shader::UniformType::Double:
+                return 2;
+            case Tegra::Shader::UniformType::Quad:
+            case Tegra::Shader::UniformType::UnsignedQuad:
+                return 4;
+            default:
+                UNIMPLEMENTED_MSG("Unimplemented LDG size!");
+                return 1;
+            }
+        }();
+
+        const Node addr_register = GetRegister(instr.gpr8);
+        const Node base_address =
+            TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
+        const auto cbuf = std::get_if<CbufNode>(base_address);
+        ASSERT(cbuf != nullptr);
+        const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+        ASSERT(cbuf_offset_imm != nullptr);
+        const auto cbuf_offset = cbuf_offset_imm->GetValue();
+
+        bb.push_back(Comment(
+            fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
+
+        const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
+        used_global_memory_bases.insert(descriptor);
+
+        const Node immediate_offset =
+            Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
+        const Node base_real_address =
+            Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
+
+        for (u32 i = 0; i < count; ++i) {
+            const Node it_offset = Immediate(i * 4);
+            const Node real_address =
+                Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
+            const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+
+            SetTemporal(bb, i, gmem);
+        }
+        for (u32 i = 0; i < count; ++i) {
+            SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+        }
+        break;
+    }
+    case OpCode::Id::ST_A: {
+        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
+                             "Indirect attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
+                             "Unaligned attribute loads are not supported");
+
+        u64 next_element = instr.attribute.fmt20.element;
+        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+        const auto StoreNextElement = [&](u32 reg_offset) {
+            const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
+                                                 next_element, GetRegister(instr.gpr39));
+            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
+
+            bb.push_back(Operation(OperationCode::Assign, dest, src));
+
+            // Load the next attribute element into the following register. If the element
+            // to load goes beyond the vec4 size, load the first element of the next
+            // attribute.
+            next_element = (next_element + 1) % 4;
+            next_index = next_index + (next_element == 0 ? 1 : 0);
+        };
+
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            StoreNextElement(reg_offset);
+        }
+
+        break;
+    }
+    case OpCode::Id::ST_L: {
+        UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
+                             static_cast<u32>(instr.st_l.unknown.Value()));
+
+        const auto GetLmemAddr = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
+        };
+
+        switch (instr.ldst_sl.type.Value()) {
+        case Tegra::Shader::StoreType::Bits128:
+            SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
+            SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
+        case Tegra::Shader::StoreType::Bits64:
+            SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
+        case Tegra::Shader::StoreType::Bits32:
+            SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
+            break;
+        default:
+            UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
+                              static_cast<u32>(instr.ldst_sl.type.Value()));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
new file mode 100644
index 000000000..d750a2936
--- /dev/null
+++ b/src/video_core/shader/decode/other.cpp
@@ -0,0 +1,189 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::ConditionCode;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+
+u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::EXIT: {
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
+                             static_cast<u32>(cc));
+
+        switch (instr.flow.cond) {
+        case Tegra::Shader::FlowCondition::Always:
+            bb.push_back(Operation(OperationCode::Exit));
+            if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) {
+                // If this is an unconditional exit then just end processing here,
+                // otherwise we have to account for the possibility of the condition
+                // not being met, so continue processing the next instruction.
+                pc = MAX_PROGRAM_LENGTH - 1;
+            }
+            break;
+
+        case Tegra::Shader::FlowCondition::Fcsm_Tr:
+            // TODO(bunnei): What is this used for? If we assume this conditon is not
+            // satisifed, dual vertex shaders in Farming Simulator make more sense
+            UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
+            break;
+
+        default:
+            UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
+                              static_cast<u32>(instr.flow.cond.Value()));
+        }
+        break;
+    }
+    case OpCode::Id::KIL: {
+        UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
+
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
+                             static_cast<u32>(cc));
+
+        bb.push_back(Operation(OperationCode::Discard));
+        break;
+    }
+    case OpCode::Id::MOV_SYS: {
+        switch (instr.sys20) {
+        case Tegra::Shader::SystemVariable::InvocationInfo: {
+            LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+            SetRegister(bb, instr.gpr0, Immediate(0u));
+            break;
+        }
+        case Tegra::Shader::SystemVariable::Ydirection: {
+            // Config pack's third value is Y_NEGATE's state.
+            SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value()));
+        }
+        break;
+    }
+    case OpCode::Id::BRA: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "BRA with constant buffers are not implemented");
+
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        const Node branch = Operation(OperationCode::Branch, Immediate(target));
+
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        if (cc != Tegra::Shader::ConditionCode::T) {
+            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
+        } else {
+            bb.push_back(branch);
+        }
+        break;
+    }
+    case OpCode::Id::SSY: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "Constant buffer flow is not supported");
+
+        // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
+        // target of the jump that the SYNC instruction will make. The SSY opcode has a similar
+        // structure to the BRA opcode.
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
+        break;
+    }
+    case OpCode::Id::PBK: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "Constant buffer PBK is not supported");
+
+        // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
+        // using SYNC on a PBK address will kill the shader execution. We don't emulate this because
+        // it's very unlikely a driver will emit such invalid shader.
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
+        break;
+    }
+    case OpCode::Id::SYNC: {
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
+                             static_cast<u32>(cc));
+
+        // The SYNC opcode jumps to the address previously set by the SSY opcode
+        bb.push_back(Operation(OperationCode::PopFlowStack));
+        break;
+    }
+    case OpCode::Id::BRK: {
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
+                             static_cast<u32>(cc));
+
+        // The BRK opcode jumps to the address previously set by the PBK opcode
+        bb.push_back(Operation(OperationCode::PopFlowStack));
+        break;
+    }
+    case OpCode::Id::IPA: {
+        const auto& attribute = instr.attribute.fmt28;
+        const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
+                                                instr.ipa.sample_mode.Value()};
+
+        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
+        Node value = attr;
+        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
+        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
+            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
+            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
+            // In theory by setting them as perspective, OpenGL does the perspective correction.
+            // A way must figured to reverse the last step of it.
+            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
+                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+            }
+        }
+        value = GetSaturatedFloat(value, instr.ipa.saturate);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::OUT_R: {
+        UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
+                             "Stream buffer is not supported");
+
+        if (instr.out.emit) {
+            // gpr0 is used to store the next address and gpr8 contains the address to emit.
+            // Hardware uses pointers here but we just ignore it
+            bb.push_back(Operation(OperationCode::EmitVertex));
+            SetRegister(bb, instr.gpr0, Immediate(0));
+        }
+        if (instr.out.cut) {
+            bb.push_back(Operation(OperationCode::EndPrimitive));
+        }
+        break;
+    }
+    case OpCode::Id::ISBERD: {
+        UNIMPLEMENTED_IF(instr.isberd.o != 0);
+        UNIMPLEMENTED_IF(instr.isberd.skew != 0);
+        UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
+        UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
+        LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
+        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
+        break;
+    }
+    case OpCode::Id::DEPBAR: {
+        LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
new file mode 100644
index 000000000..83c61680e
--- /dev/null
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -0,0 +1,67 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::PSETP: {
+        const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+        const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+
+        // We can't use the constant predicate as destination.
+        ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+        const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
+        const Node predicate = Operation(combiner, op_a, op_b);
+
+        // Set the primary predicate to the result of Predicate OP SecondPredicate
+        SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
+
+        if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
+            // enabled
+            SetPredicate(bb, instr.psetp.pred0,
+                         Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
+                                   second_pred));
+        }
+        break;
+    }
+    case OpCode::Id::CSETP: {
+        const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
+        const Node condition_code = GetConditionCode(instr.csetp.cc);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
+
+        if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
+            SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
+        }
+        if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
+            SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
new file mode 100644
index 000000000..d0495995d
--- /dev/null
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -0,0 +1,46 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                         "Condition codes generation in PSET is not implemented");
+
+    const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+    const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+    const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
+
+    const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
+    const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    if (instr.pset.bf) {
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    } else {
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+    }
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
new file mode 100644
index 000000000..f070e8912
--- /dev/null
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -0,0 +1,51 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
+
+    const Node apply_mask = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::R2P_IMM:
+            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+        default:
+            UNREACHABLE();
+            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+        }
+    }();
+    const Node mask = GetRegister(instr.gpr8);
+    const auto offset = static_cast<u32>(instr.r2p.byte) * 8;
+
+    constexpr u32 programmable_preds = 7;
+    for (u64 pred = 0; pred < programmable_preds; ++pred) {
+        const auto shift = static_cast<u32>(pred);
+
+        const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
+        const Node condition =
+            Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+
+        const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
+        const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+
+        const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
+        bb.push_back(Conditional(condition, {code}));
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
new file mode 100644
index 000000000..951e85f44
--- /dev/null
+++ b/src/video_core/shader/decode/shift.cpp
@@ -0,0 +1,55 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const Node op_a = GetRegister(instr.gpr8);
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::SHR_C:
+    case OpCode::Id::SHR_R:
+    case OpCode::Id::SHR_IMM: {
+        const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
+                                           instr.shift.is_signed, PRECISE, op_a, op_b);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::SHL_C:
+    case OpCode::Id::SHL_R:
+    case OpCode::Id::SHL_IMM: {
+        const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a775b402b
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,598 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <vector>
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+using Tegra::Shader::TextureMiscMode;
+using Tegra::Shader::TextureProcessMode;
+using Tegra::Shader::TextureType;
+
+static std::size_t GetCoordCount(TextureType texture_type) {
+    switch (texture_type) {
+    case TextureType::Texture1D:
+        return 1;
+    case TextureType::Texture2D:
+        return 2;
+    case TextureType::Texture3D:
+    case TextureType::TextureCube:
+        return 3;
+    default:
+        UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
+        return 0;
+    }
+}
+
+u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::TEX: {
+        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
+        }
+
+        const TextureType texture_type{instr.tex.texture_type};
+        const bool is_array = instr.tex.array != 0;
+        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex.GetTextureProcessMode();
+        WriteTexInstructionFloat(
+            bb, instr,
+            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
+        break;
+    }
+    case OpCode::Id::TEXS: {
+        const TextureType texture_type{instr.texs.GetTextureType()};
+        const bool is_array{instr.texs.IsArrayTexture()};
+        const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.texs.GetTextureProcessMode();
+
+        if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
+        }
+
+        const Node4 components =
+            GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
+
+        if (instr.texs.fp32_flag) {
+            WriteTexsInstructionFloat(bb, instr, components);
+        } else {
+            WriteTexsInstructionHalfFloat(bb, instr, components);
+        }
+        break;
+    }
+    case OpCode::Id::TLD4: {
+        ASSERT(instr.tld4.array == 0);
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
+                             "NDV is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
+                             "PTP is not implemented");
+
+        if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
+        }
+
+        const auto texture_type = instr.tld4.texture_type.Value();
+        const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+        const bool is_array = instr.tld4.array != 0;
+        const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+        WriteTexInstructionFloat(
+            bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
+        break;
+    }
+    case OpCode::Id::TLD4S: {
+        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
+        }
+
+        const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+        const Node op_a = GetRegister(instr.gpr8);
+        const Node op_b = GetRegister(instr.gpr20);
+
+        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+        std::vector<Node> coords;
+        if (depth_compare) {
+            // Note: TLD4S coordinate encoding works just like TEXS's
+            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
+            coords.push_back(op_a);
+            coords.push_back(op_y);
+            coords.push_back(op_b);
+        } else {
+            coords.push_back(op_a);
+            coords.push_back(op_b);
+        }
+        const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
+
+        const auto& sampler =
+            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+
+        Node4 values;
+        for (u32 element = 0; element < values.size(); ++element) {
+            auto coords_copy = coords;
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
+            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
+        }
+
+        WriteTexsInstructionFloat(bb, instr, values);
+        break;
+    }
+    case OpCode::Id::TXQ: {
+        if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
+        }
+
+        // TODO: The new commits on the texture refactor, change the way samplers work.
+        // Sadly, not all texture instructions specify the type of texture their sampler
+        // uses. This must be fixed at a later instance.
+        const auto& sampler =
+            GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+
+        u32 indexer = 0;
+        switch (instr.txq.query_type) {
+        case Tegra::Shader::TextureQueryType::Dimension: {
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.txq.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+                const Node value =
+                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+                SetTemporal(bb, indexer++, value);
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+            }
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
+                              static_cast<u32>(instr.txq.query_type.Value()));
+        }
+        break;
+    }
+    case OpCode::Id::TMML: {
+        UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+                             "NDV is not implemented");
+
+        if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+        }
+
+        auto texture_type = instr.tmml.texture_type.Value();
+        const bool is_array = instr.tmml.array != 0;
+        const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+        std::vector<Node> coords;
+
+        // TODO: Add coordinates for different samplers once other texture types are implemented.
+        switch (texture_type) {
+        case TextureType::Texture1D:
+            coords.push_back(GetRegister(instr.gpr8));
+            break;
+        case TextureType::Texture2D:
+            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
+            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
+
+            // Fallback to interpreting as a 2D texture for now
+            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
+            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+            texture_type = TextureType::Texture2D;
+        }
+
+        for (u32 element = 0; element < 2; ++element) {
+            auto params = coords;
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
+            SetTemporal(bb, element, value);
+        }
+        for (u32 element = 0; element < 2; ++element) {
+            SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
+        }
+
+        break;
+    }
+    case OpCode::Id::TLDS: {
+        const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+        const bool is_array{instr.tlds.IsArrayTexture()};
+
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
+
+        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
+        }
+
+        WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
+                                    bool is_array, bool is_shadow) {
+    const auto offset = static_cast<std::size_t>(sampler.index.Value());
+
+    // If this sampler has already been used, return the existing mapping.
+    const auto itr =
+        std::find_if(used_samplers.begin(), used_samplers.end(),
+                     [&](const Sampler& entry) { return entry.GetOffset() == offset; });
+    if (itr != used_samplers.end()) {
+        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
+               itr->IsShadow() == is_shadow);
+        return *itr;
+    }
+
+    // Otherwise create a new mapping for this sampler
+    const std::size_t next_index = used_samplers.size();
+    const Sampler entry{offset, next_index, type, is_array, is_shadow};
+    return *used_samplers.emplace(entry).first;
+}
+
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
+    u32 dest_elem = 0;
+    for (u32 elem = 0; elem < 4; ++elem) {
+        if (!instr.tex.IsComponentEnabled(elem)) {
+            // Skip disabled components
+            continue;
+        }
+        SetTemporal(bb, dest_elem++, components[elem]);
+    }
+    // After writing values in temporals, move them to the real registers
+    for (u32 i = 0; i < dest_elem; ++i) {
+        SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+    }
+}
+
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
+                                         const Node4& components) {
+    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
+    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
+
+    u32 dest_elem = 0;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component))
+            continue;
+        SetTemporal(bb, dest_elem++, components[component]);
+    }
+
+    for (u32 i = 0; i < dest_elem; ++i) {
+        if (i < 2) {
+            // Write the first two swizzle components to gpr0 and gpr0+1
+            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
+        } else {
+            ASSERT(instr.texs.HasTwoDestinations());
+            // Write the rest of the swizzle components to gpr28 and gpr28+1
+            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
+        }
+    }
+}
+
+void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
+                                             const Node4& components) {
+    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+    // float instruction).
+
+    Node4 values;
+    u32 dest_elem = 0;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component))
+            continue;
+        values[dest_elem++] = components[component];
+    }
+    if (dest_elem == 0)
+        return;
+
+    std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
+
+    const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
+    if (dest_elem <= 2) {
+        SetRegister(bb, instr.gpr0, first_value);
+        return;
+    }
+
+    SetTemporal(bb, 0, first_value);
+    SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
+
+    SetRegister(bb, instr.gpr0, GetTemporal(0));
+    SetRegister(bb, instr.gpr28, GetTemporal(1));
+}
+
+Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
+                               TextureProcessMode process_mode, std::vector<Node> coords,
+                               Node array, Node depth_compare, u32 bias_offset,
+                               std::vector<Node> aoffi) {
+    const bool is_array = array;
+    const bool is_shadow = depth_compare;
+
+    UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
+                             (texture_type == TextureType::TextureCube && is_array && is_shadow),
+                         "This method is not supported.");
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+
+    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
+                            process_mode == TextureProcessMode::LL ||
+                            process_mode == TextureProcessMode::LLA;
+
+    // LOD selection (either via bias or explicit textureLod) not supported in GL for
+    // sampler2DArrayShadow and samplerCubeArrayShadow.
+    const bool gl_lod_supported =
+        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
+          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
+
+    const OperationCode read_method =
+        (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
+
+    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
+
+    Node bias = {};
+    Node lod = {};
+    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
+        switch (process_mode) {
+        case TextureProcessMode::LZ:
+            lod = Immediate(0.0f);
+            break;
+        case TextureProcessMode::LB:
+            // If present, lod or bias are always stored in the register indexed by the gpr20
+            // field with an offset depending on the usage of the other registers
+            bias = GetRegister(instr.gpr20.Value() + bias_offset);
+            break;
+        case TextureProcessMode::LL:
+            lod = GetRegister(instr.gpr20.Value() + bias_offset);
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
+            break;
+        }
+    }
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto copy_coords = coords;
+        MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
+        values[element] = Operation(read_method, meta, std::move(copy_coords));
+    }
+
+    return values;
+}
+
+Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
+                           TextureProcessMode process_mode, bool depth_compare, bool is_array,
+                           bool is_aoffi) {
+    const bool lod_bias_enabled{
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+
+    u64 parameter_register = instr.gpr20.Value();
+    if (lod_bias_enabled) {
+        ++parameter_register;
+    }
+
+    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
+        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(coord_register + i));
+    }
+    // 1D.DC in OpenGL the 2nd component is ignored.
+    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
+        coords.push_back(Immediate(0.0f));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    std::vector<Node> aoffi;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
+    }
+
+    Node dc{};
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
+        dc = GetRegister(parameter_register++);
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
+}
+
+Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
+                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+    const bool lod_bias_enabled =
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+
+    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
+        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+    const u64 last_coord_register =
+        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+    const u32 bias_offset = coord_count > 2 ? 1 : 0;
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc{};
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
+        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+        dc = GetRegister(depth_register);
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
+}
+
+Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
+                            bool is_array, bool is_aoffi) {
+    const std::size_t coord_count = GetCoordCount(texture_type);
+    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
+    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(coord_register + i));
+    }
+
+    u64 parameter_register = instr.gpr20.Value();
+    std::vector<Node> aoffi;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+    }
+
+    Node dc{};
+    if (depth_compare) {
+        dc = GetRegister(parameter_register++);
+    }
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
+        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
+    }
+
+    return values;
+}
+
+Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
+    const std::size_t type_coord_count = GetCoordCount(texture_type);
+    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // if is array gpr20 is used
+    const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
+
+    const u64 last_coord_register =
+        ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < type_coord_count; ++i) {
+        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+    // When lod is used always is in gpr20
+    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
+    }
+    return values;
+}
+
+std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
+    TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
+    std::size_t max_coords, std::size_t max_inputs) {
+    const std::size_t coord_count = GetCoordCount(texture_type);
+
+    std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
+    const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
+    if (total_coord_count > max_coords || total_reg_count > max_inputs) {
+        UNIMPLEMENTED_MSG("Unsupported Texture operation");
+        total_coord_count = std::min(total_coord_count, max_coords);
+    }
+    // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
+    total_coord_count +=
+        (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
+
+    return {coord_count, total_coord_count};
+}
+
+std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
+                                                bool is_tld4) {
+    const auto [coord_offsets, size, wrap_value,
+                diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
+        if (is_tld4) {
+            return {{0, 8, 16}, 6, 32, 64};
+        } else {
+            return {{0, 4, 8}, 4, 8, 16};
+        }
+    }();
+    const u32 mask = (1U << size) - 1;
+
+    std::vector<Node> aoffi;
+    aoffi.reserve(coord_count);
+
+    const auto aoffi_immediate{
+        TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
+    if (!aoffi_immediate) {
+        // Variable access, not supported on AMD.
+        LOG_WARNING(HW_GPU,
+                    "AOFFI constant folding failed, some hardware might have graphical issues");
+        for (std::size_t coord = 0; coord < coord_count; ++coord) {
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+            const Node condition =
+                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
+            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
+            aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
+        }
+        return aoffi;
+    }
+
+    for (std::size_t coord = 0; coord < coord_count; ++coord) {
+        s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+        if (value >= wrap_value) {
+            value -= diff_value;
+        }
+        aoffi.push_back(Immediate(value));
+    }
+    return aoffi;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
new file mode 100644
index 000000000..956c01d9b
--- /dev/null
+++ b/src/video_core/shader/decode/video.cpp
@@ -0,0 +1,111 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::VideoType;
+using Tegra::Shader::VmadShr;
+
+u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const Node op_a =
+        GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
+                        instr.video.type_a, instr.video.byte_height_a);
+    const Node op_b = [&]() {
+        if (instr.video.use_register_b) {
+            return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
+                                   instr.video.signed_b, instr.video.type_b,
+                                   instr.video.byte_height_b);
+        }
+        if (instr.video.signed_b) {
+            const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
+            return Immediate(static_cast<u32>(imm));
+        } else {
+            return Immediate(instr.alu.GetImm20_16());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::VMAD: {
+        const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
+        const Node op_c = GetRegister(instr.gpr39);
+
+        Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
+        value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
+
+        if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
+            const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
+            value =
+                SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
+        }
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::VSETP: {
+        // We can't use the constant predicate as destination.
+        ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+        const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
+        const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
+        const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
+
+        // Set the primary predicate to the result of Predicate OP SecondPredicate
+        SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
+
+        if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+            // if enabled
+            const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
+            SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
+                               Tegra::Shader::VideoType type, u64 byte_height) {
+    if (!is_chunk) {
+        return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
+    }
+    const Node zero = Immediate(0);
+
+    switch (type) {
+    case Tegra::Shader::VideoType::Size16_Low:
+        return BitfieldExtract(op, 0, 16);
+    case Tegra::Shader::VideoType::Size16_High:
+        return BitfieldExtract(op, 16, 16);
+    case Tegra::Shader::VideoType::Size32:
+        // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
+        // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
+        UNIMPLEMENTED();
+        return zero;
+    case Tegra::Shader::VideoType::Invalid:
+        UNREACHABLE_MSG("Invalid instruction encoding");
+        return zero;
+    default:
+        UNREACHABLE();
+        return zero;
+    }
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
new file mode 100644
index 000000000..db15c0718
--- /dev/null
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -0,0 +1,119 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF(instr.xmad.sign_a);
+    UNIMPLEMENTED_IF(instr.xmad.sign_b);
+    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                         "Condition codes generation in XMAD is not implemented");
+
+    Node op_a = GetRegister(instr.gpr8);
+
+    // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
+    UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
+    const bool is_signed_a = instr.xmad.sign_a == 1;
+    const bool is_signed_b = instr.xmad.sign_b == 1;
+    const bool is_signed_c = is_signed_a;
+
+    auto [is_merge, is_psl, is_high_b, mode, op_b,
+          op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::XMAD_CR:
+            return {instr.xmad.merge_56,
+                    instr.xmad.product_shift_left_second,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                    GetRegister(instr.gpr39)};
+        case OpCode::Id::XMAD_RR:
+            return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
+                    instr.xmad.mode,     GetRegister(instr.gpr20),      GetRegister(instr.gpr39)};
+        case OpCode::Id::XMAD_RC:
+            return {false,
+                    false,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
+                    GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        case OpCode::Id::XMAD_IMM:
+            return {instr.xmad.merge_37,
+                    instr.xmad.product_shift_left,
+                    false,
+                    instr.xmad.mode,
+                    Immediate(static_cast<u32>(instr.xmad.imm20_16)),
+                    GetRegister(instr.gpr39)};
+        }
+        UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
+        return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
+    }();
+
+    op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
+
+    const Node original_b = op_b;
+    op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
+
+    // TODO(Rodrigo): Use an appropiate sign for this operation
+    Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
+    if (is_psl) {
+        product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
+    }
+    SetTemporal(bb, 0, product);
+    product = GetTemporal(0);
+
+    const Node original_c = op_c;
+    const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
+    op_c = [&]() {
+        switch (set_mode) {
+        case Tegra::Shader::XmadMode::None:
+            return original_c;
+        case Tegra::Shader::XmadMode::CLo:
+            return BitfieldExtract(original_c, 0, 16);
+        case Tegra::Shader::XmadMode::CHi:
+            return BitfieldExtract(original_c, 16, 16);
+        case Tegra::Shader::XmadMode::CBcc: {
+            const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+                                                   NO_PRECISE, original_b, Immediate(16));
+            return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c,
+                                   shifted_b);
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
+            return Immediate(0);
+        }
+    }();
+
+    SetTemporal(bb, 1, op_c);
+    op_c = GetTemporal(1);
+
+    // TODO(Rodrigo): Use an appropiate sign for this operation
+    Node sum = Operation(OperationCode::IAdd, product, op_c);
+    SetTemporal(bb, 2, sum);
+    sum = GetTemporal(2);
+    if (is_merge) {
+        const Node a = BitfieldExtract(sum, 0, 16);
+        const Node b =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16));
+        sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
+    }
+
+    SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
+    SetRegister(bb, instr.gpr0, sum);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
new file mode 100644
index 000000000..ac5112d78
--- /dev/null
+++ b/src/video_core/shader/shader_ir.cpp
@@ -0,0 +1,444 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cmath>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Attribute;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::IpaMode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::PredCondition;
+using Tegra::Shader::PredOperation;
+using Tegra::Shader::Register;
+
+Node ShaderIR::StoreNode(NodeData&& node_data) {
+    auto store = std::make_unique<NodeData>(node_data);
+    const Node node = store.get();
+    stored_nodes.push_back(std::move(store));
+    return node;
+}
+
+Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
+    return StoreNode(ConditionalNode(condition, std::move(code)));
+}
+
+Node ShaderIR::Comment(const std::string& text) {
+    return StoreNode(CommentNode(text));
+}
+
+Node ShaderIR::Immediate(u32 value) {
+    return StoreNode(ImmediateNode(value));
+}
+
+Node ShaderIR::GetRegister(Register reg) {
+    if (reg != Register::ZeroIndex) {
+        used_registers.insert(static_cast<u32>(reg));
+    }
+    return StoreNode(GprNode(reg));
+}
+
+Node ShaderIR::GetImmediate19(Instruction instr) {
+    return Immediate(instr.alu.GetImm20_19());
+}
+
+Node ShaderIR::GetImmediate32(Instruction instr) {
+    return Immediate(instr.alu.GetImm20_32());
+}
+
+Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
+    const auto index = static_cast<u32>(index_);
+    const auto offset = static_cast<u32>(offset_);
+
+    const auto [entry, is_new] = used_cbufs.try_emplace(index);
+    entry->second.MarkAsUsed(offset);
+
+    return StoreNode(CbufNode(index, Immediate(offset)));
+}
+
+Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
+    const auto index = static_cast<u32>(index_);
+    const auto offset = static_cast<u32>(offset_);
+
+    const auto [entry, is_new] = used_cbufs.try_emplace(index);
+    entry->second.MarkAsUsedIndirect();
+
+    const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
+    return StoreNode(CbufNode(index, final_offset));
+}
+
+Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
+    const auto pred = static_cast<Pred>(pred_);
+    if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
+        used_predicates.insert(pred);
+    }
+
+    return StoreNode(PredicateNode(pred, negated));
+}
+
+Node ShaderIR::GetPredicate(bool immediate) {
+    return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
+}
+
+Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
+                                 const Tegra::Shader::IpaMode& input_mode, Node buffer) {
+    const auto [entry, is_new] =
+        used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
+    entry->second.insert(input_mode);
+
+    return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
+}
+
+Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
+    if (index == Attribute::Index::ClipDistances0123 ||
+        index == Attribute::Index::ClipDistances4567) {
+        const auto clip_index =
+            static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element);
+        used_clip_distances.at(clip_index) = true;
+    }
+    used_output_attributes.insert(index);
+
+    return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
+}
+
+Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
+    const Node node = StoreNode(InternalFlagNode(flag));
+    if (negated) {
+        return Operation(OperationCode::LogicalNegate, node);
+    }
+    return node;
+}
+
+Node ShaderIR::GetLocalMemory(Node address) {
+    return StoreNode(LmemNode(address));
+}
+
+Node ShaderIR::GetTemporal(u32 id) {
+    return GetRegister(Register::ZeroIndex + 1 + id);
+}
+
+Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
+    if (absolute) {
+        value = Operation(OperationCode::FAbsolute, NO_PRECISE, value);
+    }
+    if (negate) {
+        value = Operation(OperationCode::FNegate, NO_PRECISE, value);
+    }
+    return value;
+}
+
+Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
+    if (!saturate) {
+        return value;
+    }
+    const Node positive_zero = Immediate(std::copysignf(0, 1));
+    const Node positive_one = Immediate(1.0f);
+    return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one);
+}
+
+Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) {
+    switch (size) {
+    case Register::Size::Byte:
+        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
+                                Immediate(24));
+        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
+                                Immediate(24));
+        return value;
+    case Register::Size::Short:
+        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
+                                Immediate(16));
+        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
+                                Immediate(16));
+    case Register::Size::Word:
+        // Default - do nothing
+        return value;
+    default:
+        UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
+        return value;
+    }
+}
+
+Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
+    if (!is_signed) {
+        // Absolute or negate on an unsigned is pointless
+        return value;
+    }
+    if (absolute) {
+        value = Operation(OperationCode::IAbsolute, NO_PRECISE, value);
+    }
+    if (negate) {
+        value = Operation(OperationCode::INegate, NO_PRECISE, value);
+    }
+    return value;
+}
+
+Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
+    const Node value = Immediate(instr.half_imm.PackImmediates());
+    if (!has_negation) {
+        return value;
+    }
+    const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
+    const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
+
+    return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate);
+}
+
+Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
+    switch (merge) {
+    case Tegra::Shader::HalfMerge::H0_H1:
+        return src;
+    case Tegra::Shader::HalfMerge::F32:
+        return Operation(OperationCode::HMergeF32, src);
+    case Tegra::Shader::HalfMerge::Mrg_H0:
+        return Operation(OperationCode::HMergeH0, dest, src);
+    case Tegra::Shader::HalfMerge::Mrg_H1:
+        return Operation(OperationCode::HMergeH1, dest, src);
+    }
+    UNREACHABLE();
+    return src;
+}
+
+Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
+    if (absolute) {
+        value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value);
+    }
+    if (negate) {
+        value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true),
+                          GetPredicate(true));
+    }
+    return value;
+}
+
+Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
+    static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+        {PredCondition::LessThan, OperationCode::LogicalFLessThan},
+        {PredCondition::Equal, OperationCode::LogicalFEqual},
+        {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
+        {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
+        {PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
+        {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
+        {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
+        {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
+        {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
+        {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
+        {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}};
+
+    const auto comparison{PredicateComparisonTable.find(condition)};
+    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+                         "Unknown predicate comparison operation");
+
+    Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
+
+    if (condition == PredCondition::LessThanWithNan ||
+        condition == PredCondition::NotEqualWithNan ||
+        condition == PredCondition::LessEqualWithNan ||
+        condition == PredCondition::GreaterThanWithNan ||
+        condition == PredCondition::GreaterEqualWithNan) {
+
+        predicate = Operation(OperationCode::LogicalOr, predicate,
+                              Operation(OperationCode::LogicalFIsNan, op_a));
+        predicate = Operation(OperationCode::LogicalOr, predicate,
+                              Operation(OperationCode::LogicalFIsNan, op_b));
+    }
+
+    return predicate;
+}
+
+Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
+                                             Node op_b) {
+    static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+        {PredCondition::LessThan, OperationCode::LogicalILessThan},
+        {PredCondition::Equal, OperationCode::LogicalIEqual},
+        {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
+        {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
+        {PredCondition::NotEqual, OperationCode::LogicalINotEqual},
+        {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
+        {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
+        {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
+        {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
+        {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
+        {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}};
+
+    const auto comparison{PredicateComparisonTable.find(condition)};
+    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+                         "Unknown predicate comparison operation");
+
+    Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b);
+
+    UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
+                             condition == PredCondition::NotEqualWithNan ||
+                             condition == PredCondition::LessEqualWithNan ||
+                             condition == PredCondition::GreaterThanWithNan ||
+                             condition == PredCondition::GreaterEqualWithNan,
+                         "NaN comparisons for integers are not implemented");
+    return predicate;
+}
+
+Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
+                                          const MetaHalfArithmetic& meta, Node op_a, Node op_b) {
+
+    UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
+                             condition == PredCondition::NotEqualWithNan ||
+                             condition == PredCondition::LessEqualWithNan ||
+                             condition == PredCondition::GreaterThanWithNan ||
+                             condition == PredCondition::GreaterEqualWithNan,
+                         "Unimplemented NaN comparison for half floats");
+
+    static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+        {PredCondition::LessThan, OperationCode::Logical2HLessThan},
+        {PredCondition::Equal, OperationCode::Logical2HEqual},
+        {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
+        {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
+        {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
+        {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
+        {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan},
+        {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual},
+        {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual},
+        {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan},
+        {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}};
+
+    const auto comparison{PredicateComparisonTable.find(condition)};
+    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+                         "Unknown predicate comparison operation");
+
+    const Node predicate = Operation(comparison->second, meta, op_a, op_b);
+
+    return predicate;
+}
+
+OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
+    static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
+        {PredOperation::And, OperationCode::LogicalAnd},
+        {PredOperation::Or, OperationCode::LogicalOr},
+        {PredOperation::Xor, OperationCode::LogicalXor},
+    };
+
+    const auto op = PredicateOperationTable.find(operation);
+    UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation");
+    return op->second;
+}
+
+Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
+    switch (cc) {
+    case Tegra::Shader::ConditionCode::NEU:
+        return GetInternalFlag(InternalFlag::Zero, true);
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
+        return GetPredicate(static_cast<u64>(Pred::NeverExecute));
+    }
+}
+
+void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
+    bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
+}
+
+void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
+    bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
+}
+
+void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
+    bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
+}
+
+void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
+    bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
+}
+
+void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
+    SetRegister(bb, Register::ZeroIndex + 1 + id, value);
+}
+
+void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
+    if (!sets_cc) {
+        return;
+    }
+    const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
+    SetInternalFlag(bb, InternalFlag::Zero, zerop);
+    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
+}
+
+void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
+    if (!sets_cc) {
+        return;
+    }
+    const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
+    SetInternalFlag(bb, InternalFlag::Zero, zerop);
+    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
+}
+
+Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
+    return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
+                     Immediate(bits));
+}
+
+/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code,
+                                                        bool is_signed) {
+    if (is_signed) {
+        return operation_code;
+    }
+    switch (operation_code) {
+    case OperationCode::FCastInteger:
+        return OperationCode::FCastUInteger;
+    case OperationCode::IAdd:
+        return OperationCode::UAdd;
+    case OperationCode::IMul:
+        return OperationCode::UMul;
+    case OperationCode::IDiv:
+        return OperationCode::UDiv;
+    case OperationCode::IMin:
+        return OperationCode::UMin;
+    case OperationCode::IMax:
+        return OperationCode::UMax;
+    case OperationCode::ICastFloat:
+        return OperationCode::UCastFloat;
+    case OperationCode::ICastUnsigned:
+        return OperationCode::UCastSigned;
+    case OperationCode::ILogicalShiftLeft:
+        return OperationCode::ULogicalShiftLeft;
+    case OperationCode::ILogicalShiftRight:
+        return OperationCode::ULogicalShiftRight;
+    case OperationCode::IArithmeticShiftRight:
+        return OperationCode::UArithmeticShiftRight;
+    case OperationCode::IBitwiseAnd:
+        return OperationCode::UBitwiseAnd;
+    case OperationCode::IBitwiseOr:
+        return OperationCode::UBitwiseOr;
+    case OperationCode::IBitwiseXor:
+        return OperationCode::UBitwiseXor;
+    case OperationCode::IBitwiseNot:
+        return OperationCode::UBitwiseNot;
+    case OperationCode::IBitfieldInsert:
+        return OperationCode::UBitfieldInsert;
+    case OperationCode::IBitCount:
+        return OperationCode::UBitCount;
+    case OperationCode::LogicalILessThan:
+        return OperationCode::LogicalULessThan;
+    case OperationCode::LogicalIEqual:
+        return OperationCode::LogicalUEqual;
+    case OperationCode::LogicalILessEqual:
+        return OperationCode::LogicalULessEqual;
+    case OperationCode::LogicalIGreaterThan:
+        return OperationCode::LogicalUGreaterThan;
+    case OperationCode::LogicalINotEqual:
+        return OperationCode::LogicalUNotEqual;
+    case OperationCode::LogicalIGreaterEqual:
+        return OperationCode::LogicalUGreaterEqual;
+    case OperationCode::INegate:
+        UNREACHABLE_MSG("Can't negate an unsigned integer");
+    case OperationCode::IAbsolute:
+        UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
+    }
+    UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
+    return {};
+}
+
+} // namespace VideoCommon::Shader
+\ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
new file mode 100644
index 000000000..4888998d3
--- /dev/null
+++ b/src/video_core/shader/shader_ir.h
@@ -0,0 +1,842 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <map>
+#include <optional>
+#include <set>
+#include <string>
+#include <tuple>
+#include <variant>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
+
+namespace VideoCommon::Shader {
+
+class OperationNode;
+class ConditionalNode;
+class GprNode;
+class ImmediateNode;
+class InternalFlagNode;
+class PredicateNode;
+class AbufNode; ///< Attribute buffer
+class CbufNode; ///< Constant buffer
+class LmemNode; ///< Local memory
+class GmemNode; ///< Global memory
+class CommentNode;
+
+using ProgramCode = std::vector<u64>;
+
+using NodeData =
+    std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
+                 PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
+using Node = const NodeData*;
+using Node4 = std::array<Node, 4>;
+using NodeBlock = std::vector<Node>;
+
+constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
+
+enum class OperationCode {
+    Assign, /// (float& dest, float src) -> void
+
+    Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
+
+    FAdd,          /// (MetaArithmetic, float a, float b) -> float
+    FMul,          /// (MetaArithmetic, float a, float b) -> float
+    FDiv,          /// (MetaArithmetic, float a, float b) -> float
+    FFma,          /// (MetaArithmetic, float a, float b, float c) -> float
+    FNegate,       /// (MetaArithmetic, float a) -> float
+    FAbsolute,     /// (MetaArithmetic, float a) -> float
+    FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
+    FMin,          /// (MetaArithmetic, float a, float b) -> float
+    FMax,          /// (MetaArithmetic, float a, float b) -> float
+    FCos,          /// (MetaArithmetic, float a) -> float
+    FSin,          /// (MetaArithmetic, float a) -> float
+    FExp2,         /// (MetaArithmetic, float a) -> float
+    FLog2,         /// (MetaArithmetic, float a) -> float
+    FInverseSqrt,  /// (MetaArithmetic, float a) -> float
+    FSqrt,         /// (MetaArithmetic, float a) -> float
+    FRoundEven,    /// (MetaArithmetic, float a) -> float
+    FFloor,        /// (MetaArithmetic, float a) -> float
+    FCeil,         /// (MetaArithmetic, float a) -> float
+    FTrunc,        /// (MetaArithmetic, float a) -> float
+    FCastInteger,  /// (MetaArithmetic, int a) -> float
+    FCastUInteger, /// (MetaArithmetic, uint a) -> float
+
+    IAdd,                  /// (MetaArithmetic, int a, int b) -> int
+    IMul,                  /// (MetaArithmetic, int a, int b) -> int
+    IDiv,                  /// (MetaArithmetic, int a, int b) -> int
+    INegate,               /// (MetaArithmetic, int a) -> int
+    IAbsolute,             /// (MetaArithmetic, int a) -> int
+    IMin,                  /// (MetaArithmetic, int a, int b) -> int
+    IMax,                  /// (MetaArithmetic, int a, int b) -> int
+    ICastFloat,            /// (MetaArithmetic, float a) -> int
+    ICastUnsigned,         /// (MetaArithmetic, uint a) -> int
+    ILogicalShiftLeft,     /// (MetaArithmetic, int a, uint b) -> int
+    ILogicalShiftRight,    /// (MetaArithmetic, int a, uint b) -> int
+    IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
+    IBitwiseAnd,           /// (MetaArithmetic, int a, int b) -> int
+    IBitwiseOr,            /// (MetaArithmetic, int a, int b) -> int
+    IBitwiseXor,           /// (MetaArithmetic, int a, int b) -> int
+    IBitwiseNot,           /// (MetaArithmetic, int a) -> int
+    IBitfieldInsert,       /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
+    IBitfieldExtract,      /// (MetaArithmetic, int value, int offset, int offset) -> int
+    IBitCount,             /// (MetaArithmetic, int) -> int
+
+    UAdd,                  /// (MetaArithmetic, uint a, uint b) -> uint
+    UMul,                  /// (MetaArithmetic, uint a, uint b) -> uint
+    UDiv,                  /// (MetaArithmetic, uint a, uint b) -> uint
+    UMin,                  /// (MetaArithmetic, uint a, uint b) -> uint
+    UMax,                  /// (MetaArithmetic, uint a, uint b) -> uint
+    UCastFloat,            /// (MetaArithmetic, float a) -> uint
+    UCastSigned,           /// (MetaArithmetic, int a) -> uint
+    ULogicalShiftLeft,     /// (MetaArithmetic, uint a, uint b) -> uint
+    ULogicalShiftRight,    /// (MetaArithmetic, uint a, uint b) -> uint
+    UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
+    UBitwiseAnd,           /// (MetaArithmetic, uint a, uint b) -> uint
+    UBitwiseOr,            /// (MetaArithmetic, uint a, uint b) -> uint
+    UBitwiseXor,           /// (MetaArithmetic, uint a, uint b) -> uint
+    UBitwiseNot,           /// (MetaArithmetic, uint a) -> uint
+    UBitfieldInsert,  /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
+    UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
+    UBitCount,        /// (MetaArithmetic, uint) -> uint
+
+    HAdd,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HMul,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HFma,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
+    HAbsolute, /// (f16vec2 a) -> f16vec2
+    HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
+    HMergeF32, /// (f16vec2 src) -> float
+    HMergeH0,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
+    HMergeH1,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
+    HPack2,    /// (float a, float b) -> f16vec2
+
+    LogicalAssign, /// (bool& dst, bool src) -> void
+    LogicalAnd,    /// (bool a, bool b) -> bool
+    LogicalOr,     /// (bool a, bool b) -> bool
+    LogicalXor,    /// (bool a, bool b) -> bool
+    LogicalNegate, /// (bool a) -> bool
+    LogicalPick2,  /// (bool2 pair, uint index) -> bool
+    LogicalAll2,   /// (bool2 a) -> bool
+    LogicalAny2,   /// (bool2 a) -> bool
+
+    LogicalFLessThan,     /// (float a, float b) -> bool
+    LogicalFEqual,        /// (float a, float b) -> bool
+    LogicalFLessEqual,    /// (float a, float b) -> bool
+    LogicalFGreaterThan,  /// (float a, float b) -> bool
+    LogicalFNotEqual,     /// (float a, float b) -> bool
+    LogicalFGreaterEqual, /// (float a, float b) -> bool
+    LogicalFIsNan,        /// (float a) -> bool
+
+    LogicalILessThan,     /// (int a, int b) -> bool
+    LogicalIEqual,        /// (int a, int b) -> bool
+    LogicalILessEqual,    /// (int a, int b) -> bool
+    LogicalIGreaterThan,  /// (int a, int b) -> bool
+    LogicalINotEqual,     /// (int a, int b) -> bool
+    LogicalIGreaterEqual, /// (int a, int b) -> bool
+
+    LogicalULessThan,     /// (uint a, uint b) -> bool
+    LogicalUEqual,        /// (uint a, uint b) -> bool
+    LogicalULessEqual,    /// (uint a, uint b) -> bool
+    LogicalUGreaterThan,  /// (uint a, uint b) -> bool
+    LogicalUNotEqual,     /// (uint a, uint b) -> bool
+    LogicalUGreaterEqual, /// (uint a, uint b) -> bool
+
+    Logical2HLessThan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessEqual,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterThan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HNotEqual,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+
+    Texture,                /// (MetaTexture, float[N] coords) -> float4
+    TextureLod,             /// (MetaTexture, float[N] coords) -> float4
+    TextureGather,          /// (MetaTexture, float[N] coords) -> float4
+    TextureQueryDimensions, /// (MetaTexture, float a) -> float4
+    TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
+    TexelFetch,             /// (MetaTexture, int[N], int) -> float4
+
+    Branch,        /// (uint branch_target) -> void
+    PushFlowStack, /// (uint branch_target) -> void
+    PopFlowStack,  /// () -> void
+    Exit,          /// () -> void
+    Discard,       /// () -> void
+
+    EmitVertex,   /// () -> void
+    EndPrimitive, /// () -> void
+
+    YNegate, /// () -> float
+
+    Amount,
+};
+
+enum class InternalFlag {
+    Zero = 0,
+    Sign = 1,
+    Carry = 2,
+    Overflow = 3,
+    Amount = 4,
+};
+
+/// Describes the behaviour of code path of a given entry point and a return point.
+enum class ExitMethod {
+    Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
+    AlwaysReturn, ///< All code paths reach the return point.
+    Conditional,  ///< Code path reaches the return point or an END instruction conditionally.
+    AlwaysEnd,    ///< All code paths reach a END instruction.
+};
+
+class Sampler {
+public:
+    explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
+                     bool is_array, bool is_shadow)
+        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
+
+    std::size_t GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetIndex() const {
+        return index;
+    }
+
+    Tegra::Shader::TextureType GetType() const {
+        return type;
+    }
+
+    bool IsArray() const {
+        return is_array;
+    }
+
+    bool IsShadow() const {
+        return is_shadow;
+    }
+
+    bool operator<(const Sampler& rhs) const {
+        return std::tie(offset, index, type, is_array, is_shadow) <
+               std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
+    }
+
+private:
+    /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
+    /// instruction.
+    std::size_t offset{};
+    std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+    Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
+    bool is_array{};  ///< Whether the texture is being sampled as an array texture or not.
+    bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+};
+
+class ConstBuffer {
+public:
+    explicit ConstBuffer(u32 max_offset, bool is_indirect)
+        : max_offset{max_offset}, is_indirect{is_indirect} {}
+
+    ConstBuffer() = default;
+
+    void MarkAsUsed(u64 offset) {
+        max_offset = std::max(max_offset, static_cast<u32>(offset));
+    }
+
+    void MarkAsUsedIndirect() {
+        is_indirect = true;
+    }
+
+    bool IsIndirect() const {
+        return is_indirect;
+    }
+
+    u32 GetSize() const {
+        return max_offset + sizeof(float);
+    }
+
+    u32 GetMaxOffset() const {
+        return max_offset;
+    }
+
+private:
+    u32 max_offset{};
+    bool is_indirect{};
+};
+
+struct GlobalMemoryBase {
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+
+    bool operator<(const GlobalMemoryBase& rhs) const {
+        return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
+    }
+};
+
+struct MetaArithmetic {
+    bool precise{};
+};
+
+struct MetaHalfArithmetic {
+    bool precise{};
+    std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1,
+                                                    Tegra::Shader::HalfType::H0_H1,
+                                                    Tegra::Shader::HalfType::H0_H1};
+};
+
+struct MetaTexture {
+    const Sampler& sampler;
+    Node array{};
+    Node depth_compare{};
+    std::vector<Node> aoffi;
+    Node bias{};
+    Node lod{};
+    Node component{};
+    u32 element{};
+};
+
+constexpr MetaArithmetic PRECISE = {true};
+constexpr MetaArithmetic NO_PRECISE = {false};
+constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false};
+
+using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>;
+
+/// Holds any kind of operation that can be done in the IR
+class OperationNode final {
+public:
+    template <typename... T>
+    explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
+
+    template <typename... T>
+    explicit constexpr OperationNode(OperationCode code, Meta&& meta)
+        : code{code}, meta{std::move(meta)} {}
+
+    template <typename... T>
+    explicit constexpr OperationNode(OperationCode code, const T*... operands)
+        : OperationNode(code, {}, operands...) {}
+
+    template <typename... T>
+    explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
+        : code{code}, meta{std::move(meta)} {
+
+        auto operands_list = {operands_...};
+        for (auto& operand : operands_list) {
+            operands.push_back(operand);
+        }
+    }
+
+    explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
+        : code{code}, meta{meta}, operands{std::move(operands)} {}
+
+    explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
+        : code{code}, meta{}, operands{std::move(operands)} {}
+
+    OperationCode GetCode() const {
+        return code;
+    }
+
+    const Meta& GetMeta() const {
+        return meta;
+    }
+
+    std::size_t GetOperandsCount() const {
+        return operands.size();
+    }
+
+    Node operator[](std::size_t operand_index) const {
+        return operands.at(operand_index);
+    }
+
+private:
+    const OperationCode code;
+    const Meta meta;
+    std::vector<Node> operands;
+};
+
+/// Encloses inside any kind of node that returns a boolean conditionally-executed code
+class ConditionalNode final {
+public:
+    explicit ConditionalNode(Node condition, std::vector<Node>&& code)
+        : condition{condition}, code{std::move(code)} {}
+
+    Node GetCondition() const {
+        return condition;
+    }
+
+    const std::vector<Node>& GetCode() const {
+        return code;
+    }
+
+private:
+    const Node condition;   ///< Condition to be satisfied
+    std::vector<Node> code; ///< Code to execute
+};
+
+/// A general purpose register
+class GprNode final {
+public:
+    explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {}
+
+    u32 GetIndex() const {
+        return static_cast<u32>(index);
+    }
+
+private:
+    const Tegra::Shader::Register index;
+};
+
+/// A 32-bits value that represents an immediate value
+class ImmediateNode final {
+public:
+    explicit constexpr ImmediateNode(u32 value) : value{value} {}
+
+    u32 GetValue() const {
+        return value;
+    }
+
+private:
+    const u32 value;
+};
+
+/// One of Maxwell's internal flags
+class InternalFlagNode final {
+public:
+    explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {}
+
+    InternalFlag GetFlag() const {
+        return flag;
+    }
+
+private:
+    const InternalFlag flag;
+};
+
+/// A predicate register, it can be negated without additional nodes
+class PredicateNode final {
+public:
+    explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated)
+        : index{index}, negated{negated} {}
+
+    Tegra::Shader::Pred GetIndex() const {
+        return index;
+    }
+
+    bool IsNegated() const {
+        return negated;
+    }
+
+private:
+    const Tegra::Shader::Pred index;
+    const bool negated;
+};
+
+/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
+class AbufNode final {
+public:
+    explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
+                                const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
+        : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
+
+    explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
+                                Node buffer = {})
+        : input_mode{}, buffer{buffer}, index{index}, element{element} {}
+
+    Tegra::Shader::IpaMode GetInputMode() const {
+        return input_mode;
+    }
+
+    Tegra::Shader::Attribute::Index GetIndex() const {
+        return index;
+    }
+
+    u32 GetElement() const {
+        return element;
+    }
+
+    Node GetBuffer() const {
+        return buffer;
+    }
+
+private:
+    const Tegra::Shader::IpaMode input_mode;
+    const Node buffer;
+    const Tegra::Shader::Attribute::Index index;
+    const u32 element;
+};
+
+/// Constant buffer node, usually mapped to uniform buffers in GLSL
+class CbufNode final {
+public:
+    explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {}
+
+    u32 GetIndex() const {
+        return index;
+    }
+
+    Node GetOffset() const {
+        return offset;
+    }
+
+private:
+    const u32 index;
+    const Node offset;
+};
+
+/// Local memory node
+class LmemNode final {
+public:
+    explicit constexpr LmemNode(Node address) : address{address} {}
+
+    Node GetAddress() const {
+        return address;
+    }
+
+private:
+    const Node address;
+};
+
+/// Global memory node
+class GmemNode final {
+public:
+    explicit constexpr GmemNode(Node real_address, Node base_address,
+                                const GlobalMemoryBase& descriptor)
+        : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
+
+    Node GetRealAddress() const {
+        return real_address;
+    }
+
+    Node GetBaseAddress() const {
+        return base_address;
+    }
+
+    const GlobalMemoryBase& GetDescriptor() const {
+        return descriptor;
+    }
+
+private:
+    const Node real_address;
+    const Node base_address;
+    const GlobalMemoryBase descriptor;
+};
+
+/// Commentary, can be dropped
+class CommentNode final {
+public:
+    explicit CommentNode(std::string text) : text{std::move(text)} {}
+
+    const std::string& GetText() const {
+        return text;
+    }
+
+private:
+    std::string text;
+};
+
+class ShaderIR final {
+public:
+    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
+        : program_code{program_code}, main_offset{main_offset} {
+
+        Decode();
+    }
+
+    const std::map<u32, NodeBlock>& GetBasicBlocks() const {
+        return basic_blocks;
+    }
+
+    const std::set<u32>& GetRegisters() const {
+        return used_registers;
+    }
+
+    const std::set<Tegra::Shader::Pred>& GetPredicates() const {
+        return used_predicates;
+    }
+
+    const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
+    GetInputAttributes() const {
+        return used_input_attributes;
+    }
+
+    const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
+        return used_output_attributes;
+    }
+
+    const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
+        return used_cbufs;
+    }
+
+    const std::set<Sampler>& GetSamplers() const {
+        return used_samplers;
+    }
+
+    const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
+        const {
+        return used_clip_distances;
+    }
+
+    const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
+        return used_global_memory_bases;
+    }
+
+    std::size_t GetLength() const {
+        return static_cast<std::size_t>(coverage_end * sizeof(u64));
+    }
+
+    const Tegra::Shader::Header& GetHeader() const {
+        return header;
+    }
+
+private:
+    void Decode();
+
+    ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
+
+    NodeBlock DecodeRange(u32 begin, u32 end);
+
+    /**
+     * Decodes a single instruction from Tegra to IR.
+     * @param bb Basic block where the nodes will be written to.
+     * @param pc Program counter. Offset to decode.
+     * @return Next address to decode.
+     */
+    u32 DecodeInstr(NodeBlock& bb, u32 pc);
+
+    u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
+    u32 DecodeBfe(NodeBlock& bb, u32 pc);
+    u32 DecodeBfi(NodeBlock& bb, u32 pc);
+    u32 DecodeShift(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
+    u32 DecodeFfma(NodeBlock& bb, u32 pc);
+    u32 DecodeHfma2(NodeBlock& bb, u32 pc);
+    u32 DecodeConversion(NodeBlock& bb, u32 pc);
+    u32 DecodeMemory(NodeBlock& bb, u32 pc);
+    u32 DecodeTexture(NodeBlock& bb, u32 pc);
+    u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
+    u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
+    u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
+    u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
+    u32 DecodeVideo(NodeBlock& bb, u32 pc);
+    u32 DecodeXmad(NodeBlock& bb, u32 pc);
+    u32 DecodeOther(NodeBlock& bb, u32 pc);
+
+    /// Internalizes node's data and returns a managed pointer to a clone of that node
+    Node StoreNode(NodeData&& node_data);
+
+    /// Creates a conditional node
+    Node Conditional(Node condition, std::vector<Node>&& code);
+    /// Creates a commentary
+    Node Comment(const std::string& text);
+    /// Creates an u32 immediate
+    Node Immediate(u32 value);
+    /// Creates a s32 immediate
+    Node Immediate(s32 value) {
+        return Immediate(static_cast<u32>(value));
+    }
+    /// Creates a f32 immediate
+    Node Immediate(f32 value) {
+        u32 integral;
+        std::memcpy(&integral, &value, sizeof(u32));
+        return Immediate(integral);
+    }
+
+    /// Generates a node for a passed register.
+    Node GetRegister(Tegra::Shader::Register reg);
+    /// Generates a node representing a 19-bit immediate value
+    Node GetImmediate19(Tegra::Shader::Instruction instr);
+    /// Generates a node representing a 32-bit immediate value
+    Node GetImmediate32(Tegra::Shader::Instruction instr);
+    /// Generates a node representing a constant buffer
+    Node GetConstBuffer(u64 index, u64 offset);
+    /// Generates a node representing a constant buffer with a variadic offset
+    Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
+    /// Generates a node for a passed predicate. It can be optionally negated
+    Node GetPredicate(u64 pred, bool negated = false);
+    /// Generates a predicate node for an immediate true or false value
+    Node GetPredicate(bool immediate);
+    /// Generates a node representing an input attribute. Keeps track of used attributes.
+    Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
+                           const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
+    /// Generates a node representing an output attribute. Keeps track of used attributes.
+    Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
+    /// Generates a node representing an internal flag
+    Node GetInternalFlag(InternalFlag flag, bool negated = false);
+    /// Generates a node representing a local memory address
+    Node GetLocalMemory(Node address);
+    /// Generates a temporal, internally it uses a post-RZ register
+    Node GetTemporal(u32 id);
+
+    /// Sets a register. src value must be a number-evaluated node.
+    void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
+    /// Sets a predicate. src value must be a bool-evaluated node
+    void SetPredicate(NodeBlock& bb, u64 dest, Node src);
+    /// Sets an internal flag. src value must be a bool-evaluated node
+    void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
+    /// Sets a local memory address. address and value must be a number-evaluated node
+    void SetLocalMemory(NodeBlock& bb, Node address, Node value);
+    /// Sets a temporal. Internally it uses a post-RZ register
+    void SetTemporal(NodeBlock& bb, u32 id, Node value);
+
+    /// Sets internal flags from a float
+    void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
+    /// Sets internal flags from an integer
+    void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
+
+    /// Conditionally absolute/negated float. Absolute is applied first
+    Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
+    /// Conditionally saturates a float
+    Node GetSaturatedFloat(Node value, bool saturate = true);
+
+    /// Converts an integer to different sizes.
+    Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
+    /// Conditionally absolute/negated integer. Absolute is applied first
+    Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
+
+    /// Unpacks a half immediate from an instruction
+    Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
+    /// Merges a half pair into another value
+    Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
+    /// Conditionally absolute/negated half float pair. Absolute is applied first
+    Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
+
+    /// Returns a predicate comparing two floats
+    Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
+    /// Returns a predicate comparing two integers
+    Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
+                                       Node op_a, Node op_b);
+    /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
+    Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
+                                    const MetaHalfArithmetic& meta, Node op_a, Node op_b);
+
+    /// Returns a predicate combiner operation
+    OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
+
+    /// Returns a condition code evaluated from internal flags
+    Node GetConditionCode(Tegra::Shader::ConditionCode cc);
+
+    /// Accesses a texture sampler
+    const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
+                              Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
+
+    /// Extracts a sequence of bits from a node
+    Node BitfieldExtract(Node value, u32 offset, u32 bits);
+
+    void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
+                                  const Node4& components);
+
+    void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
+                                   const Node4& components);
+    void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
+                                       const Node4& components);
+
+    Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
+                     Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
+                     bool is_array, bool is_aoffi);
+
+    Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
+                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
+                      bool is_array);
+
+    Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
+                      bool depth_compare, bool is_array, bool is_aoffi);
+
+    Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
+                      bool is_array);
+
+    std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
+        Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
+        bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
+
+    std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
+
+    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
+                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
+                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
+
+    Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
+                         u64 byte_height);
+
+    void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
+                             Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
+                             Tegra::Shader::PredicateResultMode predicate_mode,
+                             Tegra::Shader::Pred predicate, bool sets_cc);
+    void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
+                              Node op_c, Node imm_lut, bool sets_cc);
+
+    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
+
+    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+
+    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
+
+    template <typename... T>
+    Node Operation(OperationCode code, const T*... operands) {
+        return StoreNode(OperationNode(code, operands...));
+    }
+
+    template <typename... T>
+    Node Operation(OperationCode code, Meta&& meta, const T*... operands) {
+        return StoreNode(OperationNode(code, std::move(meta), operands...));
+    }
+
+    template <typename... T>
+    Node Operation(OperationCode code, std::vector<Node>&& operands) {
+        return StoreNode(OperationNode(code, std::move(operands)));
+    }
+
+    template <typename... T>
+    Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
+        return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
+    }
+
+    template <typename... T>
+    Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) {
+        return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...));
+    }
+
+    template <typename... T>
+    Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) {
+        return StoreNode(
+            OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...));
+    }
+
+    static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
+
+    const ProgramCode& program_code;
+    const u32 main_offset;
+
+    u32 coverage_begin{};
+    u32 coverage_end{};
+    std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
+
+    std::map<u32, NodeBlock> basic_blocks;
+    NodeBlock global_code;
+
+    std::vector<std::unique_ptr<NodeData>> stored_nodes;
+
+    std::set<u32> used_registers;
+    std::set<Tegra::Shader::Pred> used_predicates;
+    std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
+        used_input_attributes;
+    std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
+    std::map<u32, ConstBuffer> used_cbufs;
+    std::set<Sampler> used_samplers;
+    std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
+    std::set<GlobalMemoryBase> used_global_memory_bases;
+
+    Tegra::Shader::Header header;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
new file mode 100644
index 000000000..4505667ff
--- /dev/null
+++ b/src/video_core/shader/track.cpp
@@ -0,0 +1,102 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <utility>
+#include <variant>
+
+#include "common/common_types.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+namespace {
+std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
+                                   OperationCode operation_code) {
+    for (; cursor >= 0; --cursor) {
+        const Node node = code.at(cursor);
+        if (const auto operation = std::get_if<OperationNode>(node)) {
+            if (operation->GetCode() == operation_code)
+                return {node, cursor};
+        }
+        if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+            const auto& conditional_code = conditional->GetCode();
+            const auto [found, internal_cursor] = FindOperation(
+                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
+            if (found)
+                return {found, cursor};
+        }
+    }
+    return {};
+}
+} // namespace
+
+Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
+    if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
+        // Cbuf found, but it has to be immediate
+        return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
+    }
+    if (const auto gpr = std::get_if<GprNode>(tracked)) {
+        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
+            return nullptr;
+        }
+        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+        // register that it uses as operand
+        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
+        if (!source) {
+            return nullptr;
+        }
+        return TrackCbuf(source, code, new_cursor);
+    }
+    if (const auto operation = std::get_if<OperationNode>(tracked)) {
+        for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
+            if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
+                // Cbuf found in operand
+                return found;
+            }
+        }
+        return nullptr;
+    }
+    if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
+        const auto& conditional_code = conditional->GetCode();
+        return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
+    }
+    return nullptr;
+}
+
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+    // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
+    // that it uses as operand
+    const auto [found, found_cursor] =
+        TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
+    if (!found) {
+        return {};
+    }
+    if (const auto immediate = std::get_if<ImmediateNode>(found)) {
+        return immediate->GetValue();
+    }
+    return {};
+}
+
+std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
+                                             s64 cursor) {
+    for (; cursor >= 0; --cursor) {
+        const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
+        if (!found_node) {
+            return {};
+        }
+        const auto operation = std::get_if<OperationNode>(found_node);
+        ASSERT(operation);
+
+        const auto& target = (*operation)[0];
+        if (const auto gpr_target = std::get_if<GprNode>(target)) {
+            if (gpr_target->GetIndex() == tracked->GetIndex()) {
+                return {(*operation)[1], new_cursor};
+            }
+        }
+    }
+    return {};
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1a344229f..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -50,6 +50,24 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
     }
 }
 
+bool SurfaceTargetIsArray(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::TextureCubemap:
+        return false;
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
+        return true;
+    default:
+        LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
+        UNREACHABLE();
+        return false;
+    }
+}
+
 PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
     switch (format) {
     case Tegra::DepthFormat::S8_Z24_UNORM:
@@ -71,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
 
 PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
     switch (format) {
-        // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
-        // gamma.
     case Tegra::RenderTargetFormat::RGBA8_SRGB:
         return PixelFormat::RGBA8_SRGB;
     case Tegra::RenderTargetFormat::RGBA8_UNORM:
@@ -408,6 +424,8 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
     switch (format) {
     case Tegra::FramebufferConfig::PixelFormat::ABGR8:
         return PixelFormat::ABGR8U;
+    case Tegra::FramebufferConfig::PixelFormat::BGRA8:
+        return PixelFormat::BGRA8;
     default:
         LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
         UNREACHABLE();
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index c2259c3c2..b783e4b27 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -109,8 +109,7 @@ enum class SurfaceType {
     ColorTexture = 0,
     Depth = 1,
     DepthStencil = 2,
-    Fill = 3,
-    Invalid = 4,
+    Invalid = 3,
 };
 
 enum class SurfaceTarget {
@@ -441,6 +440,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
 
 bool SurfaceTargetIsLayered(SurfaceTarget target);
 
+bool SurfaceTargetIsArray(SurfaceTarget target);
+
 PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
 
 PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);
diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp
new file mode 100644
index 000000000..e96eba7cc
--- /dev/null
+++ b/src/video_core/texture_cache.cpp
@@ -0,0 +1,386 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using VideoCore::Surface::SurfaceTarget;
+
+using VideoCore::Surface::ComponentTypeFromDepthFormat;
+using VideoCore::Surface::ComponentTypeFromRenderTarget;
+using VideoCore::Surface::ComponentTypeFromTexture;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::PixelFormatFromTextureFormat;
+using VideoCore::Surface::SurfaceTargetFromTextureType;
+
+constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
+    return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
+}
+
+SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
+                                              const Tegra::Texture::FullTextureInfo& config) {
+    SurfaceParams params;
+    params.is_tiled = config.tic.IsTiled();
+    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
+    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
+    params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
+    params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
+    params.pixel_format =
+        PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false);
+    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
+    params.type = GetFormatType(params.pixel_format);
+    params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
+    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
+    params.depth = config.tic.Depth();
+    if (params.target == SurfaceTarget::TextureCubemap ||
+        params.target == SurfaceTarget::TextureCubeArray) {
+        params.depth *= 6;
+    }
+    params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
+    params.unaligned_height = config.tic.Height();
+    params.num_levels = config.tic.max_mip_level + 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForDepthBuffer(
+    Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
+    u32 block_width, u32 block_height, u32 block_depth,
+    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
+    SurfaceParams params;
+    params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << std::min(block_width, 5U);
+    params.block_height = 1 << std::min(block_height, 5U);
+    params.block_depth = 1 << std::min(block_depth, 5U);
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromDepthFormat(format);
+    params.component_type = ComponentTypeFromDepthFormat(format);
+    params.type = GetFormatType(params.pixel_format);
+    params.width = zeta_width;
+    params.height = zeta_height;
+    params.unaligned_height = zeta_height;
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
+    const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+    SurfaceParams params;
+    params.is_tiled =
+        config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << config.memory_layout.block_width;
+    params.block_height = 1 << config.memory_layout.block_height;
+    params.block_depth = 1 << config.memory_layout.block_depth;
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.component_type = ComponentTypeFromRenderTarget(config.format);
+    params.type = GetFormatType(params.pixel_format);
+    if (params.is_tiled) {
+        params.width = config.width;
+    } else {
+        const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
+        params.pitch = config.width;
+        params.width = params.pitch / bpp;
+    }
+    params.height = config.height;
+    params.depth = 1;
+    params.unaligned_height = config.height;
+    params.target = SurfaceTarget::Texture2D;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForFermiCopySurface(
+    const Tegra::Engines::Fermi2D::Regs::Surface& config) {
+    SurfaceParams params{};
+    params.is_tiled = !config.linear;
+    params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
+    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
+    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.component_type = ComponentTypeFromRenderTarget(config.format);
+    params.type = GetFormatType(params.pixel_format);
+    params.width = config.width;
+    params.height = config.height;
+    params.unaligned_height = config.height;
+    // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+u32 SurfaceParams::GetMipWidth(u32 level) const {
+    return std::max(1U, width >> level);
+}
+
+u32 SurfaceParams::GetMipHeight(u32 level) const {
+    return std::max(1U, height >> level);
+}
+
+u32 SurfaceParams::GetMipDepth(u32 level) const {
+    return IsLayered() ? depth : std::max(1U, depth >> level);
+}
+
+bool SurfaceParams::IsLayered() const {
+    switch (target) {
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
+    case SurfaceTarget::TextureCubemap:
+        return true;
+    default:
+        return false;
+    }
+}
+
+u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
+    // Auto block resizing algorithm from:
+    // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+    if (level == 0) {
+        return block_height;
+    }
+    const u32 height{GetMipHeight(level)};
+    const u32 default_block_height{GetDefaultBlockHeight(pixel_format)};
+    const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height};
+    u32 block_height = 16;
+    while (block_height > 1 && blocks_in_y <= block_height * 4) {
+        block_height >>= 1;
+    }
+    return block_height;
+}
+
+u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
+    if (level == 0)
+        return block_depth;
+    if (target != SurfaceTarget::Texture3D)
+        return 1;
+
+    const u32 depth{GetMipDepth(level)};
+    u32 block_depth = 32;
+    while (block_depth > 1 && depth * 2 <= block_depth) {
+        block_depth >>= 1;
+    }
+    if (block_depth == 32 && GetMipBlockHeight(level) >= 4) {
+        return 16;
+    }
+    return block_depth;
+}
+
+std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
+    std::size_t offset = 0;
+    for (u32 i = 0; i < level; i++) {
+        offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false);
+    }
+    return offset;
+}
+
+std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
+    std::size_t offset = 0;
+    for (u32 i = 0; i < level; i++) {
+        offset += GetInnerMipmapMemorySize(i, true, false, false);
+    }
+    return offset;
+}
+
+std::size_t SurfaceParams::GetGuestLayerSize() const {
+    return GetInnerMemorySize(false, true, false);
+}
+
+std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
+    return GetInnerMipmapMemorySize(level, true, IsLayered(), false);
+}
+
+bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const {
+    if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) !=
+        std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format,
+                 view_params.component_type, view_params.type)) {
+        return false;
+    }
+
+    const SurfaceTarget view_target{view_params.target};
+    if (view_target == target) {
+        return true;
+    }
+
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D:
+        return false;
+    case SurfaceTarget::Texture1DArray:
+        return view_target == SurfaceTarget::Texture1D;
+    case SurfaceTarget::Texture2DArray:
+        return view_target == SurfaceTarget::Texture2D;
+    case SurfaceTarget::TextureCubemap:
+        return view_target == SurfaceTarget::Texture2D ||
+               view_target == SurfaceTarget::Texture2DArray;
+    case SurfaceTarget::TextureCubeArray:
+        return view_target == SurfaceTarget::Texture2D ||
+               view_target == SurfaceTarget::Texture2DArray ||
+               view_target == SurfaceTarget::TextureCubemap;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target));
+        return false;
+    }
+}
+
+bool SurfaceParams::IsPixelFormatZeta() const {
+    return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
+           pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
+}
+
+void SurfaceParams::CalculateCachedValues() {
+    guest_size_in_bytes = GetInnerMemorySize(false, false, false);
+
+    // ASTC is uncompressed in software, in emulated as RGBA8
+    if (IsPixelFormatASTC(pixel_format)) {
+        host_size_in_bytes = width * height * depth * 4;
+    } else {
+        host_size_in_bytes = GetInnerMemorySize(true, false, false);
+    }
+
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D:
+        num_layers = 1;
+        break;
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        num_layers = depth;
+        break;
+    default:
+        UNREACHABLE();
+    }
+}
+
+std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
+                                                    bool uncompressed) const {
+    const bool tiled{as_host_size ? false : is_tiled};
+    const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
+    const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
+    const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)};
+    const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)};
+    const u32 depth{layer_only ? 1U : GetMipDepth(level)};
+    return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height,
+                                         depth, GetMipBlockHeight(level), GetMipBlockDepth(level));
+}
+
+std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only,
+                                              bool uncompressed) const {
+    std::size_t size = 0;
+    for (u32 level = 0; level < num_levels; ++level) {
+        size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed);
+    }
+    if (!as_host_size && is_tiled) {
+        size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
+    }
+    return size;
+}
+
+std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const {
+    std::map<u64, std::pair<u32, u32>> view_offset_map;
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D: {
+        constexpr u32 layer = 0;
+        for (u32 level = 0; level < num_levels; ++level) {
+            const std::size_t offset{GetGuestMipmapLevelOffset(level)};
+            view_offset_map.insert({offset, {layer, level}});
+        }
+        break;
+    }
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray: {
+        const std::size_t layer_size{GetGuestLayerSize()};
+        for (u32 level = 0; level < num_levels; ++level) {
+            const std::size_t level_offset{GetGuestMipmapLevelOffset(level)};
+            for (u32 layer = 0; layer < num_layers; ++layer) {
+                const auto layer_offset{static_cast<std::size_t>(layer_size * layer)};
+                const std::size_t offset{level_offset + layer_offset};
+                view_offset_map.insert({offset, {layer, level}});
+            }
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target));
+    }
+    return view_offset_map;
+}
+
+bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const {
+    return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) &&
+           IsInBounds(view_params, layer, level);
+}
+
+bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const {
+    return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level);
+}
+
+bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const {
+    if (view_params.target != SurfaceTarget::Texture3D) {
+        return true;
+    }
+    return view_params.depth == GetMipDepth(level);
+}
+
+bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const {
+    return layer + view_params.num_layers <= num_layers &&
+           level + view_params.num_levels <= num_levels;
+}
+
+std::size_t HasheableSurfaceParams::Hash() const {
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+}
+
+bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const {
+    return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
+                    height, depth, pitch, unaligned_height, num_levels, pixel_format,
+                    component_type, type, target) ==
+           std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
+                    rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
+                    rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type,
+                    rhs.type, rhs.target);
+}
+
+std::size_t ViewKey::Hash() const {
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+}
+
+bool ViewKey::operator==(const ViewKey& rhs) const {
+    return std::tie(base_layer, num_layers, base_level, num_levels) ==
+           std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h
new file mode 100644
index 000000000..041551691
--- /dev/null
+++ b/src/video_core/texture_cache.h
@@ -0,0 +1,586 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <list>
+#include <memory>
+#include <set>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra::Texture {
+struct FullTextureInfo;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace VideoCommon {
+
+class HasheableSurfaceParams {
+public:
+    std::size_t Hash() const;
+
+    bool operator==(const HasheableSurfaceParams& rhs) const;
+
+protected:
+    // Avoid creation outside of a managed environment.
+    HasheableSurfaceParams() = default;
+
+    bool is_tiled;
+    u32 block_width;
+    u32 block_height;
+    u32 block_depth;
+    u32 tile_width_spacing;
+    u32 width;
+    u32 height;
+    u32 depth;
+    u32 pitch;
+    u32 unaligned_height;
+    u32 num_levels;
+    VideoCore::Surface::PixelFormat pixel_format;
+    VideoCore::Surface::ComponentType component_type;
+    VideoCore::Surface::SurfaceType type;
+    VideoCore::Surface::SurfaceTarget target;
+};
+
+class SurfaceParams final : public HasheableSurfaceParams {
+public:
+    /// Creates SurfaceCachedParams from a texture configuration.
+    static SurfaceParams CreateForTexture(Core::System& system,
+                                          const Tegra::Texture::FullTextureInfo& config);
+
+    /// Creates SurfaceCachedParams for a depth buffer configuration.
+    static SurfaceParams CreateForDepthBuffer(
+        Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
+        u32 block_width, u32 block_height, u32 block_depth,
+        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
+
+    /// Creates SurfaceCachedParams from a framebuffer configuration.
+    static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
+
+    /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
+    static SurfaceParams CreateForFermiCopySurface(
+        const Tegra::Engines::Fermi2D::Regs::Surface& config);
+
+    bool IsTiled() const {
+        return is_tiled;
+    }
+
+    u32 GetBlockWidth() const {
+        return block_width;
+    }
+
+    u32 GetTileWidthSpacing() const {
+        return tile_width_spacing;
+    }
+
+    u32 GetWidth() const {
+        return width;
+    }
+
+    u32 GetHeight() const {
+        return height;
+    }
+
+    u32 GetDepth() const {
+        return depth;
+    }
+
+    u32 GetPitch() const {
+        return pitch;
+    }
+
+    u32 GetNumLevels() const {
+        return num_levels;
+    }
+
+    VideoCore::Surface::PixelFormat GetPixelFormat() const {
+        return pixel_format;
+    }
+
+    VideoCore::Surface::ComponentType GetComponentType() const {
+        return component_type;
+    }
+
+    VideoCore::Surface::SurfaceTarget GetTarget() const {
+        return target;
+    }
+
+    VideoCore::Surface::SurfaceType GetType() const {
+        return type;
+    }
+
+    std::size_t GetGuestSizeInBytes() const {
+        return guest_size_in_bytes;
+    }
+
+    std::size_t GetHostSizeInBytes() const {
+        return host_size_in_bytes;
+    }
+
+    u32 GetNumLayers() const {
+        return num_layers;
+    }
+
+    /// Returns the width of a given mipmap level.
+    u32 GetMipWidth(u32 level) const;
+
+    /// Returns the height of a given mipmap level.
+    u32 GetMipHeight(u32 level) const;
+
+    /// Returns the depth of a given mipmap level.
+    u32 GetMipDepth(u32 level) const;
+
+    /// Returns true if these parameters are from a layered surface.
+    bool IsLayered() const;
+
+    /// Returns the block height of a given mipmap level.
+    u32 GetMipBlockHeight(u32 level) const;
+
+    /// Returns the block depth of a given mipmap level.
+    u32 GetMipBlockDepth(u32 level) const;
+
+    /// Returns the offset in bytes in guest memory of a given mipmap level.
+    std::size_t GetGuestMipmapLevelOffset(u32 level) const;
+
+    /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
+    std::size_t GetHostMipmapLevelOffset(u32 level) const;
+
+    /// Returns the size of a layer in bytes in guest memory.
+    std::size_t GetGuestLayerSize() const;
+
+    /// Returns the size of a layer in bytes in host memory for a given mipmap level.
+    std::size_t GetHostLayerSize(u32 level) const;
+
+    /// Returns true if another surface can be familiar with this. This is a loosely defined term
+    /// that reflects the possibility of these two surface parameters potentially being part of a
+    /// bigger superset.
+    bool IsFamiliar(const SurfaceParams& view_params) const;
+
+    /// Returns true if the pixel format is a depth and/or stencil format.
+    bool IsPixelFormatZeta() const;
+
+    /// Creates a map that redirects an address difference to a layer and mipmap level.
+    std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const;
+
+    /// Returns true if the passed surface view parameters is equal or a valid subset of this.
+    bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const;
+
+private:
+    /// Calculates values that can be deduced from HasheableSurfaceParams.
+    void CalculateCachedValues();
+
+    /// Returns the size of a given mipmap level.
+    std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
+                                         bool uncompressed) const;
+
+    /// Returns the size of all mipmap levels and aligns as needed.
+    std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const;
+
+    /// Returns true if the passed view width and height match the size of this params in a given
+    /// mipmap level.
+    bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const;
+
+    /// Returns true if the passed view depth match the size of this params in a given mipmap level.
+    bool IsDepthValid(const SurfaceParams& view_params, u32 level) const;
+
+    /// Returns true if the passed view layers and mipmap levels are in bounds.
+    bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const;
+
+    std::size_t guest_size_in_bytes;
+    std::size_t host_size_in_bytes;
+    u32 num_layers;
+};
+
+struct ViewKey {
+    std::size_t Hash() const;
+
+    bool operator==(const ViewKey& rhs) const;
+
+    u32 base_layer{};
+    u32 num_layers{};
+    u32 base_level{};
+    u32 num_levels{};
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::SurfaceParams> {
+    std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+template <>
+struct hash<VideoCommon::ViewKey> {
+    std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace VideoCommon {
+
+template <typename TView, typename TExecutionContext>
+class SurfaceBase {
+    static_assert(std::is_trivially_copyable_v<TExecutionContext>);
+
+public:
+    virtual void LoadBuffer() = 0;
+
+    virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0;
+
+    virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;
+
+    TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) {
+        if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) {
+            // It can't be a view if it's in a prior address.
+            return {};
+        }
+
+        const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)};
+        const auto it{view_offset_map.find(relative_offset)};
+        if (it == view_offset_map.end()) {
+            // Couldn't find an aligned view.
+            return {};
+        }
+        const auto [layer, level] = it->second;
+
+        if (!params.IsViewValid(view_params, layer, level)) {
+            return {};
+        }
+
+        return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels());
+    }
+
+    VAddr GetCpuAddr() const {
+        ASSERT(is_registered);
+        return cpu_addr;
+    }
+
+    u8* GetHostPtr() const {
+        ASSERT(is_registered);
+        return host_ptr;
+    }
+
+    CacheAddr GetCacheAddr() const {
+        ASSERT(is_registered);
+        return cache_addr;
+    }
+
+    std::size_t GetSizeInBytes() const {
+        return params.GetGuestSizeInBytes();
+    }
+
+    void MarkAsModified(bool is_modified_) {
+        is_modified = is_modified_;
+    }
+
+    const SurfaceParams& GetSurfaceParams() const {
+        return params;
+    }
+
+    TView* GetView(VAddr view_addr, const SurfaceParams& view_params) {
+        TView* view{TryGetView(view_addr, view_params)};
+        ASSERT(view != nullptr);
+        return view;
+    }
+
+    void Register(VAddr cpu_addr_, u8* host_ptr_) {
+        ASSERT(!is_registered);
+        is_registered = true;
+        cpu_addr = cpu_addr_;
+        host_ptr = host_ptr_;
+        cache_addr = ToCacheAddr(host_ptr_);
+    }
+
+    void Register(VAddr cpu_addr_) {
+        Register(cpu_addr_, Memory::GetPointer(cpu_addr_));
+    }
+
+    void Unregister() {
+        ASSERT(is_registered);
+        is_registered = false;
+    }
+
+    bool IsRegistered() const {
+        return is_registered;
+    }
+
+protected:
+    explicit SurfaceBase(const SurfaceParams& params)
+        : params{params}, view_offset_map{params.CreateViewOffsetMap()} {}
+
+    ~SurfaceBase() = default;
+
+    virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0;
+
+    bool IsModified() const {
+        return is_modified;
+    }
+
+    const SurfaceParams params;
+
+private:
+    TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) {
+        const ViewKey key{base_layer, num_layers, base_level, num_levels};
+        const auto [entry, is_cache_miss] = views.try_emplace(key);
+        auto& view{entry->second};
+        if (is_cache_miss) {
+            view = CreateView(key);
+        }
+        return view.get();
+    }
+
+    const std::map<u64, std::pair<u32, u32>> view_offset_map;
+
+    VAddr cpu_addr{};
+    u8* host_ptr{};
+    CacheAddr cache_addr{};
+    bool is_modified{};
+    bool is_registered{};
+    std::unordered_map<ViewKey, std::unique_ptr<TView>> views;
+};
+
+template <typename TSurface, typename TView, typename TExecutionContext>
+class TextureCache {
+    static_assert(std::is_trivially_copyable_v<TExecutionContext>);
+    using ResultType = std::tuple<TView*, TExecutionContext>;
+    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>;
+    using IntervalType = typename IntervalMap::interval_type;
+
+public:
+    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+        for (TSurface* surface : GetSurfacesInRegion(addr, size)) {
+            if (!surface->IsRegistered()) {
+                // Skip duplicates
+                continue;
+            }
+            Unregister(surface);
+        }
+    }
+
+    ResultType GetTextureSurface(TExecutionContext exctx,
+                                 const Tegra::Texture::FullTextureInfo& config) {
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+        const auto params{SurfaceParams::CreateForTexture(system, config)};
+        return GetSurfaceView(exctx, *cpu_addr, params, true);
+    }
+
+    ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
+        const auto& regs{system.GPU().Maxwell3D().regs};
+        if (!regs.zeta.Address() || !regs.zeta_enable) {
+            return {{}, exctx};
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+
+        const auto depth_params{SurfaceParams::CreateForDepthBuffer(
+            system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
+            regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
+            regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
+        return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents);
+    }
+
+    ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
+                                     bool preserve_contents) {
+        ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
+
+        const auto& regs{system.GPU().Maxwell3D().regs};
+        if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
+            regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+            return {{}, exctx};
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(
+            config.Address() + config.base_layer * config.layer_stride * sizeof(u32))};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+
+        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+                              preserve_contents);
+    }
+
+    ResultType GetFermiSurface(TExecutionContext exctx,
+                               const Tegra::Engines::Fermi2D::Regs::Surface& config) {
+        const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())};
+        ASSERT(cpu_addr);
+        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config),
+                              true);
+    }
+
+    TSurface* TryFindFramebufferSurface(const u8* host_ptr) const {
+        const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
+        return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
+    }
+
+protected:
+    TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+        : system{system}, rasterizer{rasterizer} {}
+
+    ~TextureCache() = default;
+
+    virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
+                                             const SurfaceParams& params, bool preserve_contents,
+                                             const std::vector<TSurface*>& overlaps) = 0;
+
+    virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;
+
+    void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) {
+        surface->Register(cpu_addr, host_ptr);
+        registered_surfaces.add({GetSurfaceInterval(surface), {surface}});
+        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1);
+    }
+
+    void Unregister(TSurface* surface) {
+        registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}});
+        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1);
+        surface->Unregister();
+    }
+
+    TSurface* GetUncachedSurface(const SurfaceParams& params) {
+        if (TSurface* surface = TryGetReservedSurface(params); surface)
+            return surface;
+        // No reserved surface available, create a new one and reserve it
+        auto new_surface{CreateSurface(params)};
+        TSurface* surface{new_surface.get()};
+        ReserveSurface(params, std::move(new_surface));
+        return surface;
+    }
+
+    Core::System& system;
+
+private:
+    ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params,
+                              bool preserve_contents) {
+        const auto host_ptr{Memory::GetPointer(cpu_addr)};
+        const auto cache_addr{ToCacheAddr(host_ptr)};
+        const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
+        if (overlaps.empty()) {
+            return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
+        }
+
+        if (overlaps.size() == 1) {
+            if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view)
+                return {view, exctx};
+        }
+
+        TView* fast_view;
+        std::tie(fast_view, exctx) =
+            TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps);
+
+        for (TSurface* surface : overlaps) {
+            if (!fast_view) {
+                // Flush even when we don't care about the contents, to preserve memory not written
+                // by the new surface.
+                exctx = surface->FlushBuffer(exctx);
+            }
+            Unregister(surface);
+        }
+
+        if (fast_view) {
+            return {fast_view, exctx};
+        }
+
+        return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
+    }
+
+    ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
+                               const SurfaceParams& params, bool preserve_contents) {
+        TSurface* new_surface{GetUncachedSurface(params)};
+        Register(new_surface, cpu_addr, host_ptr);
+        if (preserve_contents) {
+            exctx = LoadSurface(exctx, new_surface);
+        }
+        return {new_surface->GetView(cpu_addr, params), exctx};
+    }
+
+    TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) {
+        surface->LoadBuffer();
+        exctx = surface->UploadTexture(exctx);
+        surface->MarkAsModified(false);
+        return exctx;
+    }
+
+    std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
+        if (size == 0) {
+            return {};
+        }
+        const IntervalType interval{cache_addr, cache_addr + size};
+
+        std::vector<TSurface*> surfaces;
+        for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
+            surfaces.push_back(*pair.second.begin());
+        }
+        return surfaces;
+    }
+
+    void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) {
+        surface_reserve[params].push_back(std::move(surface));
+    }
+
+    TSurface* TryGetReservedSurface(const SurfaceParams& params) {
+        auto search{surface_reserve.find(params)};
+        if (search == surface_reserve.end()) {
+            return {};
+        }
+        for (auto& surface : search->second) {
+            if (!surface->IsRegistered()) {
+                return surface.get();
+            }
+        }
+        return {};
+    }
+
+    IntervalType GetSurfaceInterval(TSurface* surface) const {
+        return IntervalType::right_open(surface->GetCacheAddr(),
+                                        surface->GetCacheAddr() + surface->GetSizeInBytes());
+    }
+
+    VideoCore::RasterizerInterface& rasterizer;
+
+    IntervalMap registered_surfaces;
+
+    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
+    /// previously been used. This is to prevent surfaces from being constantly created and
+    /// destroyed when used with different surface parameters.
+    std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
 
 #include "video_core/textures/astc.h"
 
-class BitStream {
+class InputBitStream {
 public:
-    explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+    explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
         : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
 
-    ~BitStream() = default;
-
-    int GetBitsWritten() const {
-        return m_BitsWritten;
-    }
-
-    void WriteBitsR(unsigned int val, unsigned int nBits) {
-        for (unsigned int i = 0; i < nBits; i++) {
-            WriteBit((val >> (nBits - i - 1)) & 1);
-        }
-    }
-
-    void WriteBits(unsigned int val, unsigned int nBits) {
-        for (unsigned int i = 0; i < nBits; i++) {
-            WriteBit((val >> i) & 1);
-        }
-    }
+    ~InputBitStream() = default;
 
     int GetBitsRead() const {
         return m_BitsRead;
@@ -71,6 +55,38 @@ public:
     }
 
 private:
+    const int m_NumBits;
+    const unsigned char* m_CurByte;
+    int m_NextBit = 0;
+    int m_BitsRead = 0;
+
+    bool done = false;
+};
+
+class OutputBitStream {
+public:
+    explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+
+    ~OutputBitStream() = default;
+
+    int GetBitsWritten() const {
+        return m_BitsWritten;
+    }
+
+    void WriteBitsR(unsigned int val, unsigned int nBits) {
+        for (unsigned int i = 0; i < nBits; i++) {
+            WriteBit((val >> (nBits - i - 1)) & 1);
+        }
+    }
+
+    void WriteBits(unsigned int val, unsigned int nBits) {
+        for (unsigned int i = 0; i < nBits; i++) {
+            WriteBit((val >> i) & 1);
+        }
+    }
+
+private:
     void WriteBit(int b) {
 
         if (done)
@@ -238,8 +254,8 @@ public:
     // Fills result with the values that are encoded in the given
     // bitstream. We must know beforehand what the maximum possible
     // value is, and how many values we're decoding.
-    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
-                                      uint32_t maxRange, uint32_t nValues) {
+    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
+                                      InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
         // Determine encoding parameters
         IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
 
@@ -267,7 +283,7 @@ public:
     }
 
 private:
-    static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+    static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
                                 uint32_t nBitsPerValue) {
         // Implement the algorithm in section C.2.12
         uint32_t m[5];
@@ -327,7 +343,7 @@ private:
         }
     }
 
-    static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+    static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
                                  uint32_t nBitsPerValue) {
         // Implement the algorithm in section C.2.12
         uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
     }
 };
 
-static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
+static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
     TexelWeightParams params;
 
     // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
     return params;
 }
 
-static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
+static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
                               uint32_t blockHeight) {
     // Don't actually care about the void extent, just read the bits...
     for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
 
     // We now have enough to decode our integer sequence.
     std::vector<IntegerEncodedValue> decodedColorValues;
-    BitStream colorStream(data);
+    InputBitStream colorStream(data);
     IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
 
     // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
 #undef READ_INT_VALUES
 }
 
-static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
+static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
                             const uint32_t blockHeight, uint32_t* outBuf) {
-    BitStream strm(inBuf);
+    InputBitStream strm(inBuf);
     TexelWeightParams weightParams = DecodeBlockInfo(strm);
 
     // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
     // Define color data.
     uint8_t colorEndpointData[16];
     memset(colorEndpointData, 0, sizeof(colorEndpointData));
-    BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
+    OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
 
     // Read extra config data...
     uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
     memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
 
     std::vector<IntegerEncodedValue> texelWeightValues;
-    BitStream weightStream(texelWeightData);
+    InputBitStream weightStream(texelWeightData);
 
     IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
                                                weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
 
 namespace Tegra::Texture::ASTC {
 
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                 uint32_t depth, uint32_t block_width, uint32_t block_height) {
     uint32_t blockIdx = 0;
     std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
         for (uint32_t j = 0; j < height; j += block_height) {
             for (uint32_t i = 0; i < width; i += block_width) {
 
-                uint8_t* blockPtr = data.data() + blockIdx * 16;
+                const uint8_t* blockPtr = data + blockIdx * 16;
 
                 // Blocks can be at most 12x12
                 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
 
 namespace Tegra::Texture::ASTC {
 
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                 uint32_t depth, uint32_t block_width, uint32_t block_height);
 
 } // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..82050bd51
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,93 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+#include <tuple>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/surface.h"
+#include "video_core/textures/astc.h"
+#include "video_core/textures/convert.h"
+
+namespace Tegra::Texture {
+
+using VideoCore::Surface::PixelFormat;
+
+template <bool reverse>
+void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+    union S8Z24 {
+        BitField<0, 24, u32> z24;
+        BitField<24, 8, u32> s8;
+    };
+    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
+
+    union Z24S8 {
+        BitField<0, 8, u32> s8;
+        BitField<8, 24, u32> z24;
+    };
+    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
+
+    S8Z24 s8z24_pixel{};
+    Z24S8 z24s8_pixel{};
+    constexpr auto bpp{
+        VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
+    for (std::size_t y = 0; y < height; ++y) {
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t offset{bpp * (y * width + x)};
+            if constexpr (reverse) {
+                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
+                s8z24_pixel.s8.Assign(z24s8_pixel.s8);
+                s8z24_pixel.z24.Assign(z24s8_pixel.z24);
+                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
+            } else {
+                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
+                z24s8_pixel.s8.Assign(s8z24_pixel.s8);
+                z24s8_pixel.z24.Assign(s8z24_pixel.z24);
+                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
+            }
+        }
+    }
+}
+
+static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+    SwapS8Z24ToZ24S8<false>(data, width, height);
+}
+
+static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
+    SwapS8Z24ToZ24S8<true>(data, width, height);
+}
+
+void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+                            bool convert_astc, bool convert_s8z24) {
+    if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
+        u32 block_width{};
+        u32 block_height{};
+        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
+        const std::vector<u8> rgba8_data =
+            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
+        std::copy(rgba8_data.begin(), rgba8_data.end(), data);
+
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+        Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
+    }
+}
+
+void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+                            bool convert_astc, bool convert_s8z24) {
+    if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
+                     static_cast<u32>(pixel_format));
+        UNREACHABLE();
+
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+        Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
+    }
+}
+
+} // namespace Tegra::Texture
+\ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..12542e71c
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,21 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace VideoCore::Surface {
+enum class PixelFormat;
+}
+
+namespace Tegra::Texture {
+
+void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+
+void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+
+} // namespace Tegra::Texture
+\ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
 #include "video_core/gpu.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"
@@ -103,8 +102,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
                 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
                 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
                 const u32 pixel_index{out_x + pixel_base};
-                data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
-                data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+                data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
+                data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
                 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
             }
             pixel_base += stride_x;
@@ -154,7 +153,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
             for (u32 xb = 0; xb < blocks_on_x; xb++) {
                 const u32 x_start = xb * block_x_elements;
                 const u32 x_end = std::min(width, x_start + block_x_elements);
-                if (fast) {
+                if constexpr (fast) {
                     FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
                                      z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
                                      layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
     }
 }
 
-void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
                       u32 block_depth, u32 width_spacing) {
     CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
                      (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
-                     bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
-                     block_height, block_depth, width_spacing);
+                     bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
+                     width_spacing);
 }
 
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
-                                 u32 block_height, u32 block_depth, u32 width_spacing) {
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth, u32 block_height,
+                                 u32 block_depth, u32 width_spacing) {
     std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
     UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
                      width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
 }
 
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height) {
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
     const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
                                   gob_size_x};
     for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
             const u32 gob_address =
                 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
             const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
-            const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
-            const VAddr dest_addr = swizzled_data + swizzled_offset;
+            u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
+            u8* dest_addr = swizzled_data + swizzled_offset;
 
-            Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel);
+            std::memcpy(dest_addr, source_line, bytes_per_pixel);
         }
     }
 }
 
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y) {
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y) {
     for (u32 line = 0; line < subrect_height; ++line) {
         const u32 y2 = line + offset_y;
         const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
             const u32 x2 = (x + offset_x) * bytes_per_pixel;
             const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
             const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
-            const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
-            const VAddr source_addr = swizzled_data + swizzled_offset;
+            u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
+            u8* source_addr = swizzled_data + swizzled_offset;
 
-            Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
+            std::memcpy(dest_line, source_addr, bytes_per_pixel);
         }
     }
 }
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,18 +16,15 @@ inline std::size_t GetGOBSize() {
     return 512;
 }
 
-/**
- * Unswizzles a swizzled texture without changing its format.
- */
-void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+/// Unswizzles a swizzled texture without changing its format.
+void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                       u32 block_height = TICEntry::DefaultBlockHeight,
                       u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
-/**
- * Unswizzles a swizzled texture without changing its format.
- */
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
+
+/// Unswizzles a swizzled texture without changing its format.
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth,
                                  u32 block_height = TICEntry::DefaultBlockHeight,
                                  u32 block_depth = TICEntry::DefaultBlockHeight,
                                  u32 width_spacing = 0);
@@ -37,25 +34,21 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
                       u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
                       bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
 
-/**
- * Decodes an unswizzled texture into a A8R8G8B8 texture.
- */
+/// Decodes an unswizzled texture into a A8R8G8B8 texture.
 std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
                               u32 height);
 
-/**
- * This function calculates the correct size of a texture depending if it's tiled or not.
- */
+/// This function calculates the correct size of a texture depending if it's tiled or not.
 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                           u32 block_height, u32 block_depth);
 
 /// Copies an untiled subrectangle into a tiled surface.
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height);
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
+
 /// Copies a tiled subrectangle into a linear surface.
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y);
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y);
 
 } // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e7c78bee2..bea0d5bc2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,11 +4,10 @@
 
 #pragma once
 
+#include <array>
 #include "common/assert.h"
 #include "common/bit_field.h"
-#include "common/common_funcs.h"
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 
 namespace Tegra::Texture {
 
@@ -182,7 +181,7 @@ struct TICEntry {
     };
     union {
         BitField<0, 16, u32> height_minus_1;
-        BitField<16, 15, u32> depth_minus_1;
+        BitField<16, 14, u32> depth_minus_1;
     };
     union {
         BitField<6, 13, u32> mip_lod_bias;
@@ -282,34 +281,62 @@ enum class TextureMipmapFilter : u32 {
 
 struct TSCEntry {
     union {
-        BitField<0, 3, WrapMode> wrap_u;
-        BitField<3, 3, WrapMode> wrap_v;
-        BitField<6, 3, WrapMode> wrap_p;
-        BitField<9, 1, u32> depth_compare_enabled;
-        BitField<10, 3, DepthCompareFunc> depth_compare_func;
-        BitField<13, 1, u32> srgb_conversion;
-        BitField<20, 3, u32> max_anisotropy;
+        struct {
+            union {
+                BitField<0, 3, WrapMode> wrap_u;
+                BitField<3, 3, WrapMode> wrap_v;
+                BitField<6, 3, WrapMode> wrap_p;
+                BitField<9, 1, u32> depth_compare_enabled;
+                BitField<10, 3, DepthCompareFunc> depth_compare_func;
+                BitField<13, 1, u32> srgb_conversion;
+                BitField<20, 3, u32> max_anisotropy;
+            };
+            union {
+                BitField<0, 2, TextureFilter> mag_filter;
+                BitField<4, 2, TextureFilter> min_filter;
+                BitField<6, 2, TextureMipmapFilter> mipmap_filter;
+                BitField<9, 1, u32> cubemap_interface_filtering;
+                BitField<12, 13, u32> mip_lod_bias;
+            };
+            union {
+                BitField<0, 12, u32> min_lod_clamp;
+                BitField<12, 12, u32> max_lod_clamp;
+                BitField<24, 8, u32> srgb_border_color_r;
+            };
+            union {
+                BitField<12, 8, u32> srgb_border_color_g;
+                BitField<20, 8, u32> srgb_border_color_b;
+            };
+            std::array<f32, 4> border_color;
+        };
+        std::array<u8, 0x20> raw;
     };
-    union {
-        BitField<0, 2, TextureFilter> mag_filter;
-        BitField<4, 2, TextureFilter> min_filter;
-        BitField<6, 2, TextureMipmapFilter> mip_filter;
-        BitField<9, 1, u32> cubemap_interface_filtering;
-        BitField<12, 13, u32> mip_lod_bias;
-    };
-    union {
-        BitField<0, 12, u32> min_lod_clamp;
-        BitField<12, 12, u32> max_lod_clamp;
-        BitField<24, 8, u32> srgb_border_color_r;
-    };
-    union {
-        BitField<12, 8, u32> srgb_border_color_g;
-        BitField<20, 8, u32> srgb_border_color_b;
-    };
-    float border_color_r;
-    float border_color_g;
-    float border_color_b;
-    float border_color_a;
+
+    float GetMaxAnisotropy() const {
+        return static_cast<float>(1U << max_anisotropy);
+    }
+
+    float GetMinLod() const {
+        return static_cast<float>(min_lod_clamp) / 256.0f;
+    }
+
+    float GetMaxLod() const {
+        return static_cast<float>(max_lod_clamp) / 256.0f;
+    }
+
+    float GetLodBias() const {
+        // Sign extend the 13-bit value.
+        constexpr u32 mask = 1U << (13 - 1);
+        return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
+    }
+
+    std::array<float, 4> GetBorderColor() const {
+        if (srgb_conversion) {
+            return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
+                    srgb_border_color_b / 255.0f, border_color[3]};
+        }
+        return border_color;
+    }
 };
 static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
 
@@ -317,7 +344,6 @@ struct FullTextureInfo {
     u32 index;
     TICEntry tic;
     TSCEntry tsc;
-    bool enabled;
 };
 
 /// Returns the number of bytes per pixel of the input texture format.
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index f7de3471b..cb82ecf3f 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -11,14 +11,16 @@
 
 namespace VideoCore {
 
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) {
-    return std::make_unique<OpenGL::RendererOpenGL>(emu_window);
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+                                             Core::System& system) {
+    return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
 }
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {
-    return !Settings::values.resolution_factor
-               ? renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()
-               : Settings::values.resolution_factor;
+    return static_cast<u16>(
+        Settings::values.resolution_factor
+            ? Settings::values.resolution_factor
+            : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
 }
 
 } // namespace VideoCore
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 5b373bcb1..3c583f195 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -6,6 +6,10 @@
 
 #include <memory>
 
+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -20,7 +24,8 @@ class RendererBase;
  * @note The returned renderer instance is simply allocated. Its Init()
  *       function still needs to be called to fully complete its setup.
  */
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window);
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+                                             Core::System& system);
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer);
 
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h
index 39db32dbb..821b345d7 100644
--- a/src/web_service/verify_login.h
+++ b/src/web_service/verify_login.h
@@ -4,8 +4,6 @@
 
 #pragma once
 
-#include <functional>
-#include <future>
 #include <string>
 
 namespace WebService {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index b7737b615..dc149d2ed 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -10,7 +10,6 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/web_result.h"
-#include "core/settings.h"
 #include "web_service/web_backend.h"
 
 namespace WebService {
@@ -25,7 +24,7 @@ constexpr u32 TIMEOUT_SECONDS = 30;
 struct Client::Impl {
     Impl(std::string host, std::string username, std::string token)
         : host{std::move(host)}, username{std::move(username)}, token{std::move(token)} {
-        std::lock_guard<std::mutex> lock(jwt_cache.mutex);
+        std::lock_guard lock{jwt_cache.mutex};
         if (this->username == jwt_cache.username && this->token == jwt_cache.token) {
             jwt = jwt_cache.jwt;
         }
@@ -152,7 +151,7 @@ struct Client::Impl {
         if (result.result_code != Common::WebResult::Code::Success) {
             LOG_ERROR(WebService, "UpdateJWT failed");
         } else {
-            std::lock_guard<std::mutex> lock(jwt_cache.mutex);
+            std::lock_guard lock{jwt_cache.mutex};
             jwt_cache.username = username;
             jwt_cache.token = token;
             jwt_cache.jwt = jwt = result.returned_data;
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 4cab599b4..2eb86d6e5 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -31,6 +31,8 @@ add_executable(yuzu
     configuration/configure_general.h
     configuration/configure_graphics.cpp
     configuration/configure_graphics.h
+    configuration/configure_hotkeys.cpp
+    configuration/configure_hotkeys.h
     configuration/configure_input.cpp
     configuration/configure_input.h
     configuration/configure_input_player.cpp
@@ -54,8 +56,6 @@ add_executable(yuzu
     debugger/graphics/graphics_breakpoints.cpp
     debugger/graphics/graphics_breakpoints.h
     debugger/graphics/graphics_breakpoints_p.h
-    debugger/graphics/graphics_surface.cpp
-    debugger/graphics/graphics_surface.h
     debugger/console.cpp
     debugger/console.h
     debugger/profiler.cpp
@@ -78,6 +78,8 @@ add_executable(yuzu
     ui_settings.h
     util/limitable_input_dialog.cpp
     util/limitable_input_dialog.h
+    util/sequence_dialog/sequence_dialog.cpp
+    util/sequence_dialog/sequence_dialog.h
     util/spinbox.cpp
     util/spinbox.h
     util/util.cpp
@@ -95,6 +97,7 @@ set(UIS
     configuration/configure_gamelist.ui
     configuration/configure_general.ui
     configuration/configure_graphics.ui
+    configuration/configure_hotkeys.ui
     configuration/configure_input.ui
     configuration/configure_input_player.ui
     configuration/configure_input_simple.ui
@@ -105,7 +108,6 @@ set(UIS
     configuration/configure_touchscreen_advanced.ui
     configuration/configure_web.ui
     compatdb.ui
-    hotkeys.ui
     loading_screen.ui
     main.ui
 )
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 5c1b65a2c..743b24d76 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -4,6 +4,7 @@
 
 #include <mutex>
 #include <QDialogButtonBox>
+#include <QHeaderView>
 #include <QLabel>
 #include <QLineEdit>
 #include <QScrollArea>
@@ -58,10 +59,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
 
     scroll_area = new QScrollArea;
 
-    buttons = new QDialogButtonBox;
-    buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole);
-    buttons->addButton(tr("OK"), QDialogButtonBox::AcceptRole);
-
+    buttons = new QDialogButtonBox(QDialogButtonBox::Cancel | QDialogButtonBox::Ok);
     connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept);
     connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject);
 
@@ -163,6 +161,6 @@ void QtProfileSelector::SelectProfile(
 
 void QtProfileSelector::MainWindowFinishedSelection(std::optional<Service::Account::UUID> uuid) {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     callback(uuid);
 }
diff --git a/src/yuzu/applets/profile_select.h b/src/yuzu/applets/profile_select.h
index 868573324..1c2922e54 100644
--- a/src/yuzu/applets/profile_select.h
+++ b/src/yuzu/applets/profile_select.h
@@ -7,6 +7,7 @@
 #include <vector>
 #include <QDialog>
 #include <QList>
+#include <QTreeView>
 #include "core/frontend/applets/profile_select.h"
 
 class GMainWindow;
@@ -16,7 +17,6 @@ class QLabel;
 class QScrollArea;
 class QStandardItem;
 class QStandardItemModel;
-class QTreeView;
 class QVBoxLayout;
 
 class QtProfileSelectionDialog final : public QDialog {
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
index 8a26fdff1..f3eb29b25 100644
--- a/src/yuzu/applets/software_keyboard.cpp
+++ b/src/yuzu/applets/software_keyboard.cpp
@@ -75,13 +75,13 @@ QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
         length_label->setText(QStringLiteral("%1/%2").arg(text.size()).arg(parameters.max_length));
     });
 
-    buttons = new QDialogButtonBox;
-    buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole);
-    buttons->addButton(parameters.submit_text.empty()
-                           ? tr("OK")
-                           : QString::fromStdU16String(parameters.submit_text),
-                       QDialogButtonBox::AcceptRole);
-
+    buttons = new QDialogButtonBox(QDialogButtonBox::Cancel);
+    if (parameters.submit_text.empty()) {
+        buttons->addButton(QDialogButtonBox::Ok);
+    } else {
+        buttons->addButton(QString::fromStdU16String(parameters.submit_text),
+                           QDialogButtonBox::AcceptRole);
+    }
     connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::accept);
     connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::reject);
     layout->addWidget(header_label);
@@ -141,12 +141,12 @@ void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message,
 
 void QtSoftwareKeyboard::MainWindowFinishedText(std::optional<std::u16string> text) {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     text_output(text);
 }
 
 void QtSoftwareKeyboard::MainWindowFinishedCheckDialog() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     finished_check();
 }
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 6a9138d53..ac80b2fa2 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
     window.nx.endApplet = function() {
         applet_done = true;
     };
+
+    window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
 )";
 
 QString GetNXShimInjectionScript() {
@@ -102,12 +104,12 @@ void QtWebBrowser::OpenPage(std::string_view url, std::function<void()> unpack_r
 
 void QtWebBrowser::MainWindowUnpackRomFS() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     unpack_romfs_callback();
 }
 
 void QtWebBrowser::MainWindowFinishedBrowsing() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     finished_callback();
 }
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index e1825e607..c29f2d2dc 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -27,16 +27,25 @@
 EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
 
 void EmuThread::run() {
-    if (!Settings::values.use_multi_core) {
-        // Single core mode must acquire OpenGL context for entire emulation session
-        render_window->MakeCurrent();
-    }
+    render_window->MakeCurrent();
 
     MicroProfileOnThreadCreate("EmuThread");
 
-    stop_run = false;
+    emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
+
+    Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
+        stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
+            emit LoadProgress(stage, value, total);
+        });
+
+    emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
+
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        // Release OpenGL context for the GPU thread
+        render_window->DoneCurrent();
+    }
 
-    // holds whether the cpu was running during the last iteration,
+    // Holds whether the cpu was running during the last iteration,
     // so that the DebugModeLeft signal can be emitted before the
     // next execution step
     bool was_active = false;
@@ -65,7 +74,7 @@ void EmuThread::run() {
 
             was_active = false;
         } else {
-            std::unique_lock<std::mutex> lock(running_mutex);
+            std::unique_lock lock{running_mutex};
             running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
         }
     }
@@ -184,7 +193,6 @@ GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
     setAttribute(Qt::WA_AcceptTouchEvents);
 
     InputCommon::Init();
-    InputCommon::StartJoystickEventHandler();
     connect(this, &GRenderWindow::FirstFrameDisplayed, static_cast<GMainWindow*>(parent),
             &GMainWindow::OnLoadComplete);
 }
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 288ce1572..9608b959f 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -24,6 +24,10 @@ class GRenderWindow;
 class QSurface;
 class QOpenGLContext;
 
+namespace VideoCore {
+enum class LoadCallbackStage;
+}
+
 class EmuThread : public QThread {
     Q_OBJECT
 
@@ -51,7 +55,7 @@ public:
      * @note This function is thread-safe
      */
     void SetRunning(bool running) {
-        std::unique_lock<std::mutex> lock(running_mutex);
+        std::unique_lock lock{running_mutex};
         this->running = running;
         lock.unlock();
         running_cv.notify_all();
@@ -77,7 +81,7 @@ public:
 private:
     bool exec_step = false;
     bool running = false;
-    std::atomic<bool> stop_run{false};
+    std::atomic_bool stop_run{false};
     std::mutex running_mutex;
     std::condition_variable running_cv;
 
@@ -103,6 +107,8 @@ signals:
     void DebugModeLeft();
 
     void ErrorThrown(Core::System::ResultStatus, std::string);
+
+    void LoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total);
 };
 
 class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow {
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index 5f0896f84..c8b0a5ec0 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -53,15 +53,15 @@ void CompatDB::Submit() {
     case CompatDBPage::Final:
         back();
         LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
-        Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility",
-                                   compatibility->checkedId());
+        Core::System::GetInstance().TelemetrySession().AddField(
+            Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
 
         button(NextButton)->setEnabled(false);
         button(NextButton)->setText(tr("Submitting"));
         button(QWizard::CancelButton)->setVisible(false);
 
         testcase_watcher.setFuture(QtConcurrent::run(
-            [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
+            [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
         break;
     default:
         LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index ddf4cf552..ca60bc0c9 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
+#include <QKeySequence>
 #include <QSettings>
 #include "common/file_util.h"
 #include "configure_input_simple.h"
@@ -9,7 +11,6 @@
 #include "core/hle/service/hid/controllers/npad.h"
 #include "input_common/main.h"
 #include "yuzu/configuration/config.h"
-#include "yuzu/ui_settings.h"
 
 Config::Config() {
     // TODO: Don't hardcode the path; let the frontend decide where to put the config files.
@@ -17,7 +18,6 @@ Config::Config() {
     FileUtil::CreateFullPath(qt_config_loc);
     qt_config =
         std::make_unique<QSettings>(QString::fromStdString(qt_config_loc), QSettings::IniFormat);
-
     Reload();
 }
 
@@ -205,11 +205,32 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
     Qt::Key_Control, Qt::Key_Shift, Qt::Key_AltGr, Qt::Key_ApplicationRight,
 };
 
+// This shouldn't have anything except static initializers (no functions). So
+// QKeySequnce(...).toString() is NOT ALLOWED HERE.
+// This must be in alphabetical order according to action name as it must have the same order as
+// UISetting::values.shortcuts, which is alphabetically ordered.
+const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{
+    {{"Capture Screenshot", "Main Window", {"Ctrl+P", Qt::ApplicationShortcut}},
+     {"Continue/Pause Emulation", "Main Window", {"F4", Qt::WindowShortcut}},
+     {"Decrease Speed Limit", "Main Window", {"-", Qt::ApplicationShortcut}},
+     {"Exit yuzu", "Main Window", {"Ctrl+Q", Qt::WindowShortcut}},
+     {"Exit Fullscreen", "Main Window", {"Esc", Qt::WindowShortcut}},
+     {"Fullscreen", "Main Window", {"F11", Qt::WindowShortcut}},
+     {"Increase Speed Limit", "Main Window", {"+", Qt::ApplicationShortcut}},
+     {"Load Amiibo", "Main Window", {"F2", Qt::ApplicationShortcut}},
+     {"Load File", "Main Window", {"Ctrl+O", Qt::WindowShortcut}},
+     {"Restart Emulation", "Main Window", {"F6", Qt::WindowShortcut}},
+     {"Stop Emulation", "Main Window", {"F5", Qt::WindowShortcut}},
+     {"Toggle Filter Bar", "Main Window", {"Ctrl+F", Qt::WindowShortcut}},
+     {"Toggle Speed Limit", "Main Window", {"Ctrl+Z", Qt::ApplicationShortcut}},
+     {"Toggle Status Bar", "Main Window", {"Ctrl+S", Qt::WindowShortcut}},
+     {"Change Docked Mode", "Main Window", {"F10", Qt::ApplicationShortcut}}}};
+
 void Config::ReadPlayerValues() {
     for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
         auto& player = Settings::values.players[p];
 
-        player.connected = qt_config->value(QString("player_%1_connected").arg(p), false).toBool();
+        player.connected = ReadSetting(QString("player_%1_connected").arg(p), false).toBool();
 
         player.type = static_cast<Settings::ControllerType>(
             qt_config
@@ -269,7 +290,7 @@ void Config::ReadPlayerValues() {
 }
 
 void Config::ReadDebugValues() {
-    Settings::values.debug_pad_enabled = qt_config->value("debug_pad_enabled", false).toBool();
+    Settings::values.debug_pad_enabled = ReadSetting("debug_pad_enabled", false).toBool();
     for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
         std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
         Settings::values.debug_pad_buttons[i] =
@@ -298,7 +319,7 @@ void Config::ReadDebugValues() {
 }
 
 void Config::ReadKeyboardValues() {
-    Settings::values.keyboard_enabled = qt_config->value("keyboard_enabled", false).toBool();
+    Settings::values.keyboard_enabled = ReadSetting("keyboard_enabled", false).toBool();
 
     std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(),
                    Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -311,7 +332,7 @@ void Config::ReadKeyboardValues() {
 }
 
 void Config::ReadMouseValues() {
-    Settings::values.mouse_enabled = qt_config->value("mouse_enabled", false).toBool();
+    Settings::values.mouse_enabled = ReadSetting("mouse_enabled", false).toBool();
 
     for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
         std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
@@ -327,16 +348,14 @@ void Config::ReadMouseValues() {
 }
 
 void Config::ReadTouchscreenValues() {
-    Settings::values.touchscreen.enabled = qt_config->value("touchscreen_enabled", true).toBool();
+    Settings::values.touchscreen.enabled = ReadSetting("touchscreen_enabled", true).toBool();
     Settings::values.touchscreen.device =
-        qt_config->value("touchscreen_device", "engine:emu_window").toString().toStdString();
-
-    Settings::values.touchscreen.finger = qt_config->value("touchscreen_finger", 0).toUInt();
-    Settings::values.touchscreen.rotation_angle = qt_config->value("touchscreen_angle", 0).toUInt();
-    Settings::values.touchscreen.diameter_x =
-        qt_config->value("touchscreen_diameter_x", 15).toUInt();
-    Settings::values.touchscreen.diameter_y =
-        qt_config->value("touchscreen_diameter_y", 15).toUInt();
+        ReadSetting("touchscreen_device", "engine:emu_window").toString().toStdString();
+
+    Settings::values.touchscreen.finger = ReadSetting("touchscreen_finger", 0).toUInt();
+    Settings::values.touchscreen.rotation_angle = ReadSetting("touchscreen_angle", 0).toUInt();
+    Settings::values.touchscreen.diameter_x = ReadSetting("touchscreen_diameter_x", 15).toUInt();
+    Settings::values.touchscreen.diameter_y = ReadSetting("touchscreen_diameter_y", 15).toUInt();
     qt_config->endGroup();
 }
 
@@ -357,38 +376,41 @@ void Config::ReadValues() {
     ReadTouchscreenValues();
 
     Settings::values.motion_device =
-        qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
+        ReadSetting("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
             .toString()
             .toStdString();
 
     qt_config->beginGroup("Core");
-    Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
-    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
+    Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
     qt_config->endGroup();
 
     qt_config->beginGroup("Renderer");
-    Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
-    Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
-    Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
+    Settings::values.resolution_factor = ReadSetting("resolution_factor", 1.0).toFloat();
+    Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool();
+    Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt();
+    Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool();
     Settings::values.use_accurate_gpu_emulation =
-        qt_config->value("use_accurate_gpu_emulation", false).toBool();
+        ReadSetting("use_accurate_gpu_emulation", false).toBool();
+    Settings::values.use_asynchronous_gpu_emulation =
+        ReadSetting("use_asynchronous_gpu_emulation", false).toBool();
 
-    Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
-    Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
-    Settings::values.bg_blue = qt_config->value("bg_blue", 0.0).toFloat();
+    Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
+    Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
+    Settings::values.bg_blue = ReadSetting("bg_blue", 0.0).toFloat();
     qt_config->endGroup();
 
     qt_config->beginGroup("Audio");
-    Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
+    Settings::values.sink_id = ReadSetting("output_engine", "auto").toString().toStdString();
     Settings::values.enable_audio_stretching =
-        qt_config->value("enable_audio_stretching", true).toBool();
+        ReadSetting("enable_audio_stretching", true).toBool();
     Settings::values.audio_device_id =
-        qt_config->value("output_device", "auto").toString().toStdString();
-    Settings::values.volume = qt_config->value("volume", 1).toFloat();
+        ReadSetting("output_device", "auto").toString().toStdString();
+    Settings::values.volume = ReadSetting("volume", 1).toFloat();
     qt_config->endGroup();
 
     qt_config->beginGroup("Data Storage");
-    Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool();
+    Settings::values.use_virtual_sd = ReadSetting("use_virtual_sd", true).toBool();
     FileUtil::GetUserPath(
         FileUtil::UserPath::NANDDir,
         qt_config
@@ -406,30 +428,29 @@ void Config::ReadValues() {
     qt_config->endGroup();
 
     qt_config->beginGroup("Core");
-    Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
-    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
+    Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
     qt_config->endGroup();
 
     qt_config->beginGroup("System");
-    Settings::values.use_docked_mode = qt_config->value("use_docked_mode", false).toBool();
-    Settings::values.enable_nfc = qt_config->value("enable_nfc", true).toBool();
+    Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
 
-    Settings::values.current_user = std::clamp<int>(qt_config->value("current_user", 0).toInt(), 0,
-                                                    Service::Account::MAX_USERS - 1);
+    Settings::values.current_user =
+        std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);
 
-    Settings::values.language_index = qt_config->value("language_index", 1).toInt();
+    Settings::values.language_index = ReadSetting("language_index", 1).toInt();
 
-    const auto rng_seed_enabled = qt_config->value("rng_seed_enabled", false).toBool();
+    const auto rng_seed_enabled = ReadSetting("rng_seed_enabled", false).toBool();
     if (rng_seed_enabled) {
-        Settings::values.rng_seed = qt_config->value("rng_seed", 0).toULongLong();
+        Settings::values.rng_seed = ReadSetting("rng_seed", 0).toULongLong();
     } else {
         Settings::values.rng_seed = std::nullopt;
     }
 
-    const auto custom_rtc_enabled = qt_config->value("custom_rtc_enabled", false).toBool();
+    const auto custom_rtc_enabled = ReadSetting("custom_rtc_enabled", false).toBool();
     if (custom_rtc_enabled) {
         Settings::values.custom_rtc =
-            std::chrono::seconds(qt_config->value("custom_rtc", 0).toULongLong());
+            std::chrono::seconds(ReadSetting("custom_rtc", 0).toULongLong());
     } else {
         Settings::values.custom_rtc = std::nullopt;
     }
@@ -437,35 +458,35 @@ void Config::ReadValues() {
     qt_config->endGroup();
 
     qt_config->beginGroup("Miscellaneous");
-    Settings::values.log_filter = qt_config->value("log_filter", "*:Info").toString().toStdString();
-    Settings::values.use_dev_keys = qt_config->value("use_dev_keys", false).toBool();
+    Settings::values.log_filter = ReadSetting("log_filter", "*:Info").toString().toStdString();
+    Settings::values.use_dev_keys = ReadSetting("use_dev_keys", false).toBool();
     qt_config->endGroup();
 
     qt_config->beginGroup("Debugging");
-    Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool();
-    Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt();
-    Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString();
-    Settings::values.dump_exefs = qt_config->value("dump_exefs", false).toBool();
-    Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool();
+    Settings::values.use_gdbstub = ReadSetting("use_gdbstub", false).toBool();
+    Settings::values.gdbstub_port = ReadSetting("gdbstub_port", 24689).toInt();
+    Settings::values.program_args = ReadSetting("program_args", "").toString().toStdString();
+    Settings::values.dump_exefs = ReadSetting("dump_exefs", false).toBool();
+    Settings::values.dump_nso = ReadSetting("dump_nso", false).toBool();
     qt_config->endGroup();
 
     qt_config->beginGroup("WebService");
-    Settings::values.enable_telemetry = qt_config->value("enable_telemetry", true).toBool();
+    Settings::values.enable_telemetry = ReadSetting("enable_telemetry", true).toBool();
     Settings::values.web_api_url =
-        qt_config->value("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
-    Settings::values.yuzu_username = qt_config->value("yuzu_username").toString().toStdString();
-    Settings::values.yuzu_token = qt_config->value("yuzu_token").toString().toStdString();
+        ReadSetting("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
+    Settings::values.yuzu_username = ReadSetting("yuzu_username").toString().toStdString();
+    Settings::values.yuzu_token = ReadSetting("yuzu_token").toString().toStdString();
     qt_config->endGroup();
 
     const auto size = qt_config->beginReadArray("DisabledAddOns");
     for (int i = 0; i < size; ++i) {
         qt_config->setArrayIndex(i);
-        const auto title_id = qt_config->value("title_id", 0).toULongLong();
+        const auto title_id = ReadSetting("title_id", 0).toULongLong();
         std::vector<std::string> out;
         const auto d_size = qt_config->beginReadArray("disabled");
         for (int j = 0; j < d_size; ++j) {
             qt_config->setArrayIndex(j);
-            out.push_back(qt_config->value("d", "").toString().toStdString());
+            out.push_back(ReadSetting("d", "").toString().toStdString());
         }
         qt_config->endArray();
         Settings::values.disabled_addons.insert_or_assign(title_id, out);
@@ -473,72 +494,64 @@ void Config::ReadValues() {
     qt_config->endArray();
 
     qt_config->beginGroup("UI");
-    UISettings::values.theme = qt_config->value("theme", UISettings::themes[0].second).toString();
+    UISettings::values.theme = ReadSetting("theme", UISettings::themes[0].second).toString();
     UISettings::values.enable_discord_presence =
-        qt_config->value("enable_discord_presence", true).toBool();
+        ReadSetting("enable_discord_presence", true).toBool();
     UISettings::values.screenshot_resolution_factor =
-        static_cast<u16>(qt_config->value("screenshot_resolution_factor", 0).toUInt());
-    UISettings::values.select_user_on_boot =
-        qt_config->value("select_user_on_boot", false).toBool();
+        static_cast<u16>(ReadSetting("screenshot_resolution_factor", 0).toUInt());
+    UISettings::values.select_user_on_boot = ReadSetting("select_user_on_boot", false).toBool();
 
     qt_config->beginGroup("UIGameList");
-    UISettings::values.show_unknown = qt_config->value("show_unknown", true).toBool();
-    UISettings::values.show_add_ons = qt_config->value("show_add_ons", true).toBool();
-    UISettings::values.icon_size = qt_config->value("icon_size", 64).toUInt();
-    UISettings::values.row_1_text_id = qt_config->value("row_1_text_id", 3).toUInt();
-    UISettings::values.row_2_text_id = qt_config->value("row_2_text_id", 2).toUInt();
+    UISettings::values.show_unknown = ReadSetting("show_unknown", true).toBool();
+    UISettings::values.show_add_ons = ReadSetting("show_add_ons", true).toBool();
+    UISettings::values.icon_size = ReadSetting("icon_size", 64).toUInt();
+    UISettings::values.row_1_text_id = ReadSetting("row_1_text_id", 3).toUInt();
+    UISettings::values.row_2_text_id = ReadSetting("row_2_text_id", 2).toUInt();
     qt_config->endGroup();
 
     qt_config->beginGroup("UILayout");
-    UISettings::values.geometry = qt_config->value("geometry").toByteArray();
-    UISettings::values.state = qt_config->value("state").toByteArray();
-    UISettings::values.renderwindow_geometry =
-        qt_config->value("geometryRenderWindow").toByteArray();
-    UISettings::values.gamelist_header_state =
-        qt_config->value("gameListHeaderState").toByteArray();
+    UISettings::values.geometry = ReadSetting("geometry").toByteArray();
+    UISettings::values.state = ReadSetting("state").toByteArray();
+    UISettings::values.renderwindow_geometry = ReadSetting("geometryRenderWindow").toByteArray();
+    UISettings::values.gamelist_header_state = ReadSetting("gameListHeaderState").toByteArray();
     UISettings::values.microprofile_geometry =
-        qt_config->value("microProfileDialogGeometry").toByteArray();
+        ReadSetting("microProfileDialogGeometry").toByteArray();
     UISettings::values.microprofile_visible =
-        qt_config->value("microProfileDialogVisible", false).toBool();
+        ReadSetting("microProfileDialogVisible", false).toBool();
     qt_config->endGroup();
 
     qt_config->beginGroup("Paths");
-    UISettings::values.roms_path = qt_config->value("romsPath").toString();
-    UISettings::values.symbols_path = qt_config->value("symbolsPath").toString();
-    UISettings::values.gamedir = qt_config->value("gameListRootDir", ".").toString();
-    UISettings::values.gamedir_deepscan = qt_config->value("gameListDeepScan", false).toBool();
-    UISettings::values.recent_files = qt_config->value("recentFiles").toStringList();
+    UISettings::values.roms_path = ReadSetting("romsPath").toString();
+    UISettings::values.symbols_path = ReadSetting("symbolsPath").toString();
+    UISettings::values.game_directory_path = ReadSetting("gameListRootDir", ".").toString();
+    UISettings::values.game_directory_deepscan = ReadSetting("gameListDeepScan", false).toBool();
+    UISettings::values.recent_files = ReadSetting("recentFiles").toStringList();
     qt_config->endGroup();
 
     qt_config->beginGroup("Shortcuts");
-    QStringList groups = qt_config->childGroups();
-    for (auto group : groups) {
+    for (auto [name, group, shortcut] : default_hotkeys) {
+        auto [keyseq, context] = shortcut;
         qt_config->beginGroup(group);
-
-        QStringList hotkeys = qt_config->childGroups();
-        for (auto hotkey : hotkeys) {
-            qt_config->beginGroup(hotkey);
-            UISettings::values.shortcuts.emplace_back(UISettings::Shortcut(
-                group + "/" + hotkey,
-                UISettings::ContextualShortcut(qt_config->value("KeySeq").toString(),
-                                               qt_config->value("Context").toInt())));
-            qt_config->endGroup();
-        }
-
+        qt_config->beginGroup(name);
+        UISettings::values.shortcuts.push_back(
+            {name,
+             group,
+             {ReadSetting("KeySeq", keyseq).toString(), ReadSetting("Context", context).toInt()}});
+        qt_config->endGroup();
         qt_config->endGroup();
     }
     qt_config->endGroup();
 
-    UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool();
-    UISettings::values.fullscreen = qt_config->value("fullscreen", false).toBool();
-    UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool();
-    UISettings::values.show_filter_bar = qt_config->value("showFilterBar", true).toBool();
-    UISettings::values.show_status_bar = qt_config->value("showStatusBar", true).toBool();
-    UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool();
-    UISettings::values.first_start = qt_config->value("firstStart", true).toBool();
-    UISettings::values.callout_flags = qt_config->value("calloutFlags", 0).toUInt();
-    UISettings::values.show_console = qt_config->value("showConsole", false).toBool();
-    UISettings::values.profile_index = qt_config->value("profileIndex", 0).toUInt();
+    UISettings::values.single_window_mode = ReadSetting("singleWindowMode", true).toBool();
+    UISettings::values.fullscreen = ReadSetting("fullscreen", false).toBool();
+    UISettings::values.display_titlebar = ReadSetting("displayTitleBars", true).toBool();
+    UISettings::values.show_filter_bar = ReadSetting("showFilterBar", true).toBool();
+    UISettings::values.show_status_bar = ReadSetting("showStatusBar", true).toBool();
+    UISettings::values.confirm_before_closing = ReadSetting("confirmClose", true).toBool();
+    UISettings::values.first_start = ReadSetting("firstStart", true).toBool();
+    UISettings::values.callout_flags = ReadSetting("calloutFlags", 0).toUInt();
+    UISettings::values.show_console = ReadSetting("showConsole", false).toBool();
+    UISettings::values.profile_index = ReadSetting("profileIndex", 0).toUInt();
 
     ApplyDefaultProfileIfInputInvalid();
 
@@ -549,62 +562,79 @@ void Config::SavePlayerValues() {
     for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
         const auto& player = Settings::values.players[p];
 
-        qt_config->setValue(QString("player_%1_connected").arg(p), player.connected);
-        qt_config->setValue(QString("player_%1_type").arg(p), static_cast<u8>(player.type));
+        WriteSetting(QString("player_%1_connected").arg(p), player.connected, false);
+        WriteSetting(QString("player_%1_type").arg(p), static_cast<u8>(player.type),
+                     static_cast<u8>(Settings::ControllerType::DualJoycon));
 
-        qt_config->setValue(QString("player_%1_body_color_left").arg(p), player.body_color_left);
-        qt_config->setValue(QString("player_%1_body_color_right").arg(p), player.body_color_right);
-        qt_config->setValue(QString("player_%1_button_color_left").arg(p),
-                            player.button_color_left);
-        qt_config->setValue(QString("player_%1_button_color_right").arg(p),
-                            player.button_color_right);
+        WriteSetting(QString("player_%1_body_color_left").arg(p), player.body_color_left,
+                     Settings::JOYCON_BODY_NEON_BLUE);
+        WriteSetting(QString("player_%1_body_color_right").arg(p), player.body_color_right,
+                     Settings::JOYCON_BODY_NEON_RED);
+        WriteSetting(QString("player_%1_button_color_left").arg(p), player.button_color_left,
+                     Settings::JOYCON_BUTTONS_NEON_BLUE);
+        WriteSetting(QString("player_%1_button_color_right").arg(p), player.button_color_right,
+                     Settings::JOYCON_BUTTONS_NEON_RED);
 
         for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
-            qt_config->setValue(QString("player_%1_").arg(p) +
-                                    QString::fromStdString(Settings::NativeButton::mapping[i]),
-                                QString::fromStdString(player.buttons[i]));
+            std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
+            WriteSetting(QString("player_%1_").arg(p) +
+                             QString::fromStdString(Settings::NativeButton::mapping[i]),
+                         QString::fromStdString(player.buttons[i]),
+                         QString::fromStdString(default_param));
         }
         for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
-            qt_config->setValue(QString("player_%1_").arg(p) +
-                                    QString::fromStdString(Settings::NativeAnalog::mapping[i]),
-                                QString::fromStdString(player.analogs[i]));
+            std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
+                default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
+                default_analogs[i][3], default_analogs[i][4], 0.5f);
+            WriteSetting(QString("player_%1_").arg(p) +
+                             QString::fromStdString(Settings::NativeAnalog::mapping[i]),
+                         QString::fromStdString(player.analogs[i]),
+                         QString::fromStdString(default_param));
         }
     }
 }
 
 void Config::SaveDebugValues() {
-    qt_config->setValue("debug_pad_enabled", Settings::values.debug_pad_enabled);
+    WriteSetting("debug_pad_enabled", Settings::values.debug_pad_enabled, false);
     for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
-        qt_config->setValue(QString("debug_pad_") +
-                                QString::fromStdString(Settings::NativeButton::mapping[i]),
-                            QString::fromStdString(Settings::values.debug_pad_buttons[i]));
+        std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
+        WriteSetting(QString("debug_pad_") +
+                         QString::fromStdString(Settings::NativeButton::mapping[i]),
+                     QString::fromStdString(Settings::values.debug_pad_buttons[i]),
+                     QString::fromStdString(default_param));
     }
     for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
-        qt_config->setValue(QString("debug_pad_") +
-                                QString::fromStdString(Settings::NativeAnalog::mapping[i]),
-                            QString::fromStdString(Settings::values.debug_pad_analogs[i]));
+        std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
+            default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
+            default_analogs[i][3], default_analogs[i][4], 0.5f);
+        WriteSetting(QString("debug_pad_") +
+                         QString::fromStdString(Settings::NativeAnalog::mapping[i]),
+                     QString::fromStdString(Settings::values.debug_pad_analogs[i]),
+                     QString::fromStdString(default_param));
     }
 }
 
 void Config::SaveMouseValues() {
-    qt_config->setValue("mouse_enabled", Settings::values.mouse_enabled);
+    WriteSetting("mouse_enabled", Settings::values.mouse_enabled, false);
 
     for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
-        qt_config->setValue(QString("mouse_") +
-                                QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
-                            QString::fromStdString(Settings::values.mouse_buttons[i]));
+        std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
+        WriteSetting(QString("mouse_") +
+                         QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
+                     QString::fromStdString(Settings::values.mouse_buttons[i]),
+                     QString::fromStdString(default_param));
     }
 }
 
 void Config::SaveTouchscreenValues() {
-    qt_config->setValue("touchscreen_enabled", Settings::values.touchscreen.enabled);
-    qt_config->setValue("touchscreen_device",
-                        QString::fromStdString(Settings::values.touchscreen.device));
-
-    qt_config->setValue("touchscreen_finger", Settings::values.touchscreen.finger);
-    qt_config->setValue("touchscreen_angle", Settings::values.touchscreen.rotation_angle);
-    qt_config->setValue("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x);
-    qt_config->setValue("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y);
+    WriteSetting("touchscreen_enabled", Settings::values.touchscreen.enabled, true);
+    WriteSetting("touchscreen_device", QString::fromStdString(Settings::values.touchscreen.device),
+                 "engine:emu_window");
+
+    WriteSetting("touchscreen_finger", Settings::values.touchscreen.finger, 0);
+    WriteSetting("touchscreen_angle", Settings::values.touchscreen.rotation_angle, 0);
+    WriteSetting("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x, 15);
+    WriteSetting("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y, 15);
 }
 
 void Config::SaveValues() {
@@ -615,88 +645,95 @@ void Config::SaveValues() {
     SaveMouseValues();
     SaveTouchscreenValues();
 
-    qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device));
-    qt_config->setValue("keyboard_enabled", Settings::values.keyboard_enabled);
+    WriteSetting("motion_device", QString::fromStdString(Settings::values.motion_device),
+                 "engine:motion_emu,update_period:100,sensitivity:0.01");
+    WriteSetting("keyboard_enabled", Settings::values.keyboard_enabled, false);
 
     qt_config->endGroup();
 
     qt_config->beginGroup("Core");
-    qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit);
-    qt_config->setValue("use_multi_core", Settings::values.use_multi_core);
+    WriteSetting("use_cpu_jit", Settings::values.use_cpu_jit, true);
+    WriteSetting("use_multi_core", Settings::values.use_multi_core, false);
     qt_config->endGroup();
 
     qt_config->beginGroup("Renderer");
-    qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
-    qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
-    qt_config->setValue("frame_limit", Settings::values.frame_limit);
-    qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
+    WriteSetting("resolution_factor", (double)Settings::values.resolution_factor, 1.0);
+    WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true);
+    WriteSetting("frame_limit", Settings::values.frame_limit, 100);
+    WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true);
+    WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
+    WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
+                 false);
 
     // Cast to double because Qt's written float values are not human-readable
-    qt_config->setValue("bg_red", (double)Settings::values.bg_red);
-    qt_config->setValue("bg_green", (double)Settings::values.bg_green);
-    qt_config->setValue("bg_blue", (double)Settings::values.bg_blue);
+    WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
+    WriteSetting("bg_green", (double)Settings::values.bg_green, 0.0);
+    WriteSetting("bg_blue", (double)Settings::values.bg_blue, 0.0);
     qt_config->endGroup();
 
     qt_config->beginGroup("Audio");
-    qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
-    qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching);
-    qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id));
-    qt_config->setValue("volume", Settings::values.volume);
+    WriteSetting("output_engine", QString::fromStdString(Settings::values.sink_id), "auto");
+    WriteSetting("enable_audio_stretching", Settings::values.enable_audio_stretching, true);
+    WriteSetting("output_device", QString::fromStdString(Settings::values.audio_device_id), "auto");
+    WriteSetting("volume", Settings::values.volume, 1.0f);
     qt_config->endGroup();
 
     qt_config->beginGroup("Data Storage");
-    qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd);
-    qt_config->setValue("nand_directory",
-                        QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
-    qt_config->setValue("sdmc_directory",
-                        QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
+    WriteSetting("use_virtual_sd", Settings::values.use_virtual_sd, true);
+    WriteSetting("nand_directory",
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
+    WriteSetting("sdmc_directory",
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
     qt_config->endGroup();
 
     qt_config->beginGroup("System");
-    qt_config->setValue("use_docked_mode", Settings::values.use_docked_mode);
-    qt_config->setValue("enable_nfc", Settings::values.enable_nfc);
-    qt_config->setValue("current_user", Settings::values.current_user);
-    qt_config->setValue("language_index", Settings::values.language_index);
+    WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
+    WriteSetting("current_user", Settings::values.current_user, 0);
+    WriteSetting("language_index", Settings::values.language_index, 1);
 
-    qt_config->setValue("rng_seed_enabled", Settings::values.rng_seed.has_value());
-    qt_config->setValue("rng_seed", Settings::values.rng_seed.value_or(0));
+    WriteSetting("rng_seed_enabled", Settings::values.rng_seed.has_value(), false);
+    WriteSetting("rng_seed", Settings::values.rng_seed.value_or(0), 0);
 
-    qt_config->setValue("custom_rtc_enabled", Settings::values.custom_rtc.has_value());
-    qt_config->setValue("custom_rtc",
-                        QVariant::fromValue<long long>(
-                            Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()));
+    WriteSetting("custom_rtc_enabled", Settings::values.custom_rtc.has_value(), false);
+    WriteSetting("custom_rtc",
+                 QVariant::fromValue<long long>(
+                     Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
+                 0);
 
     qt_config->endGroup();
 
     qt_config->beginGroup("Miscellaneous");
-    qt_config->setValue("log_filter", QString::fromStdString(Settings::values.log_filter));
-    qt_config->setValue("use_dev_keys", Settings::values.use_dev_keys);
+    WriteSetting("log_filter", QString::fromStdString(Settings::values.log_filter), "*:Info");
+    WriteSetting("use_dev_keys", Settings::values.use_dev_keys, false);
     qt_config->endGroup();
 
     qt_config->beginGroup("Debugging");
-    qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub);
-    qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port);
-    qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args));
-    qt_config->setValue("dump_exefs", Settings::values.dump_exefs);
-    qt_config->setValue("dump_nso", Settings::values.dump_nso);
+    WriteSetting("use_gdbstub", Settings::values.use_gdbstub, false);
+    WriteSetting("gdbstub_port", Settings::values.gdbstub_port, 24689);
+    WriteSetting("program_args", QString::fromStdString(Settings::values.program_args), "");
+    WriteSetting("dump_exefs", Settings::values.dump_exefs, false);
+    WriteSetting("dump_nso", Settings::values.dump_nso, false);
     qt_config->endGroup();
 
     qt_config->beginGroup("WebService");
-    qt_config->setValue("enable_telemetry", Settings::values.enable_telemetry);
-    qt_config->setValue("web_api_url", QString::fromStdString(Settings::values.web_api_url));
-    qt_config->setValue("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
-    qt_config->setValue("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
+    WriteSetting("enable_telemetry", Settings::values.enable_telemetry, true);
+    WriteSetting("web_api_url", QString::fromStdString(Settings::values.web_api_url),
+                 "https://api.yuzu-emu.org");
+    WriteSetting("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
+    WriteSetting("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
     qt_config->endGroup();
 
     qt_config->beginWriteArray("DisabledAddOns");
     int i = 0;
     for (const auto& elem : Settings::values.disabled_addons) {
         qt_config->setArrayIndex(i);
-        qt_config->setValue("title_id", QVariant::fromValue<u64>(elem.first));
+        WriteSetting("title_id", QVariant::fromValue<u64>(elem.first), 0);
         qt_config->beginWriteArray("disabled");
         for (std::size_t j = 0; j < elem.second.size(); ++j) {
             qt_config->setArrayIndex(static_cast<int>(j));
-            qt_config->setValue("d", QString::fromStdString(elem.second[j]));
+            WriteSetting("d", QString::fromStdString(elem.second[j]), "");
         }
         qt_config->endArray();
         ++i;
@@ -704,60 +741,93 @@ void Config::SaveValues() {
     qt_config->endArray();
 
     qt_config->beginGroup("UI");
-    qt_config->setValue("theme", UISettings::values.theme);
-    qt_config->setValue("enable_discord_presence", UISettings::values.enable_discord_presence);
-    qt_config->setValue("screenshot_resolution_factor",
-                        UISettings::values.screenshot_resolution_factor);
-    qt_config->setValue("select_user_on_boot", UISettings::values.select_user_on_boot);
+    WriteSetting("theme", UISettings::values.theme, UISettings::themes[0].second);
+    WriteSetting("enable_discord_presence", UISettings::values.enable_discord_presence, true);
+    WriteSetting("screenshot_resolution_factor", UISettings::values.screenshot_resolution_factor,
+                 0);
+    WriteSetting("select_user_on_boot", UISettings::values.select_user_on_boot, false);
 
     qt_config->beginGroup("UIGameList");
-    qt_config->setValue("show_unknown", UISettings::values.show_unknown);
-    qt_config->setValue("show_add_ons", UISettings::values.show_add_ons);
-    qt_config->setValue("icon_size", UISettings::values.icon_size);
-    qt_config->setValue("row_1_text_id", UISettings::values.row_1_text_id);
-    qt_config->setValue("row_2_text_id", UISettings::values.row_2_text_id);
+    WriteSetting("show_unknown", UISettings::values.show_unknown, true);
+    WriteSetting("show_add_ons", UISettings::values.show_add_ons, true);
+    WriteSetting("icon_size", UISettings::values.icon_size, 64);
+    WriteSetting("row_1_text_id", UISettings::values.row_1_text_id, 3);
+    WriteSetting("row_2_text_id", UISettings::values.row_2_text_id, 2);
     qt_config->endGroup();
 
     qt_config->beginGroup("UILayout");
-    qt_config->setValue("geometry", UISettings::values.geometry);
-    qt_config->setValue("state", UISettings::values.state);
-    qt_config->setValue("geometryRenderWindow", UISettings::values.renderwindow_geometry);
-    qt_config->setValue("gameListHeaderState", UISettings::values.gamelist_header_state);
-    qt_config->setValue("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
-    qt_config->setValue("microProfileDialogVisible", UISettings::values.microprofile_visible);
+    WriteSetting("geometry", UISettings::values.geometry);
+    WriteSetting("state", UISettings::values.state);
+    WriteSetting("geometryRenderWindow", UISettings::values.renderwindow_geometry);
+    WriteSetting("gameListHeaderState", UISettings::values.gamelist_header_state);
+    WriteSetting("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
+    WriteSetting("microProfileDialogVisible", UISettings::values.microprofile_visible, false);
     qt_config->endGroup();
 
     qt_config->beginGroup("Paths");
-    qt_config->setValue("romsPath", UISettings::values.roms_path);
-    qt_config->setValue("symbolsPath", UISettings::values.symbols_path);
-    qt_config->setValue("screenshotPath", UISettings::values.screenshot_path);
-    qt_config->setValue("gameListRootDir", UISettings::values.gamedir);
-    qt_config->setValue("gameListDeepScan", UISettings::values.gamedir_deepscan);
-    qt_config->setValue("recentFiles", UISettings::values.recent_files);
+    WriteSetting("romsPath", UISettings::values.roms_path);
+    WriteSetting("symbolsPath", UISettings::values.symbols_path);
+    WriteSetting("screenshotPath", UISettings::values.screenshot_path);
+    WriteSetting("gameListRootDir", UISettings::values.game_directory_path, ".");
+    WriteSetting("gameListDeepScan", UISettings::values.game_directory_deepscan, false);
+    WriteSetting("recentFiles", UISettings::values.recent_files);
     qt_config->endGroup();
 
     qt_config->beginGroup("Shortcuts");
-    for (auto shortcut : UISettings::values.shortcuts) {
-        qt_config->setValue(shortcut.first + "/KeySeq", shortcut.second.first);
-        qt_config->setValue(shortcut.first + "/Context", shortcut.second.second);
+    // Lengths of UISettings::values.shortcuts & default_hotkeys are same.
+    // However, their ordering must also be the same.
+    for (std::size_t i = 0; i < default_hotkeys.size(); i++) {
+        auto [name, group, shortcut] = UISettings::values.shortcuts[i];
+        qt_config->beginGroup(group);
+        qt_config->beginGroup(name);
+        WriteSetting("KeySeq", shortcut.first, default_hotkeys[i].shortcut.first);
+        WriteSetting("Context", shortcut.second, default_hotkeys[i].shortcut.second);
+        qt_config->endGroup();
+        qt_config->endGroup();
     }
     qt_config->endGroup();
 
-    qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode);
-    qt_config->setValue("fullscreen", UISettings::values.fullscreen);
-    qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar);
-    qt_config->setValue("showFilterBar", UISettings::values.show_filter_bar);
-    qt_config->setValue("showStatusBar", UISettings::values.show_status_bar);
-    qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing);
-    qt_config->setValue("firstStart", UISettings::values.first_start);
-    qt_config->setValue("calloutFlags", UISettings::values.callout_flags);
-    qt_config->setValue("showConsole", UISettings::values.show_console);
-    qt_config->setValue("profileIndex", UISettings::values.profile_index);
+    WriteSetting("singleWindowMode", UISettings::values.single_window_mode, true);
+    WriteSetting("fullscreen", UISettings::values.fullscreen, false);
+    WriteSetting("displayTitleBars", UISettings::values.display_titlebar, true);
+    WriteSetting("showFilterBar", UISettings::values.show_filter_bar, true);
+    WriteSetting("showStatusBar", UISettings::values.show_status_bar, true);
+    WriteSetting("confirmClose", UISettings::values.confirm_before_closing, true);
+    WriteSetting("firstStart", UISettings::values.first_start, true);
+    WriteSetting("calloutFlags", UISettings::values.callout_flags, 0);
+    WriteSetting("showConsole", UISettings::values.show_console, false);
+    WriteSetting("profileIndex", UISettings::values.profile_index, 0);
     qt_config->endGroup();
 }
 
+QVariant Config::ReadSetting(const QString& name) const {
+    return qt_config->value(name);
+}
+
+QVariant Config::ReadSetting(const QString& name, const QVariant& default_value) const {
+    QVariant result;
+    if (qt_config->value(name + "/default", false).toBool()) {
+        result = default_value;
+    } else {
+        result = qt_config->value(name, default_value);
+    }
+    return result;
+}
+
+void Config::WriteSetting(const QString& name, const QVariant& value) {
+    qt_config->setValue(name, value);
+}
+
+void Config::WriteSetting(const QString& name, const QVariant& value,
+                          const QVariant& default_value) {
+    qt_config->setValue(name + "/default", value == default_value);
+    qt_config->setValue(name, value);
+}
+
 void Config::Reload() {
     ReadValues();
+    // To apply default value changes
+    SaveValues();
     Settings::Apply();
 }
 
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index e73ad19bb..221d2364c 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <QVariant>
 #include "core/settings.h"
+#include "yuzu/ui_settings.h"
 
 class QSettings;
 
@@ -42,6 +43,13 @@ private:
     void SaveMouseValues();
     void SaveTouchscreenValues();
 
+    QVariant ReadSetting(const QString& name) const;
+    QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
+    void WriteSetting(const QString& name, const QVariant& value);
+    void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
+
+    static const std::array<UISettings::Shortcut, 15> default_hotkeys;
+
     std::unique_ptr<QSettings> qt_config;
     std::string qt_config_loc;
 };
diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui
index 3f03f0b77..267717bc9 100644
--- a/src/yuzu/configuration/configure.ui
+++ b/src/yuzu/configuration/configure.ui
@@ -7,9 +7,15 @@
     <x>0</x>
     <y>0</y>
     <width>382</width>
-    <height>241</height>
+    <height>650</height>
    </rect>
   </property>
+  <property name="minimumSize">
+   <size>
+    <width>0</width>
+    <height>650</height>
+   </size>
+  </property>
   <property name="windowTitle">
    <string>yuzu Configuration</string>
   </property>
@@ -62,6 +68,11 @@
          <string>Input</string>
         </attribute>
        </widget>
+       <widget class="ConfigureHotkeys" name="hotkeysTab">
+        <attribute name="title">
+         <string>Hotkeys</string>
+        </attribute>
+       </widget>
        <widget class="ConfigureGraphics" name="graphicsTab">
         <attribute name="title">
          <string>Graphics</string>
@@ -150,6 +161,12 @@
    <header>configuration/configure_input_simple.h</header>
    <container>1</container>
   </customwidget>
+  <customwidget>
+   <class>ConfigureHotkeys</class>
+   <extends>QWidget</extends>
+   <header>configuration/configure_hotkeys.h</header>
+   <container>1</container>
+  </customwidget>
  </customwidgets>
  <resources/>
  <connections>
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index aa7de7b54..550cf9dca 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -7,7 +7,6 @@
 #include "common/file_util.h"
 #include "common/logging/backend.h"
 #include "common/logging/filter.h"
-#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "ui_configure_debug.h"
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index d802443d0..51bd1f121 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -8,20 +8,22 @@
 #include "ui_configure.h"
 #include "yuzu/configuration/config.h"
 #include "yuzu/configuration/configure_dialog.h"
+#include "yuzu/configuration/configure_input_player.h"
 #include "yuzu/hotkeys.h"
 
-ConfigureDialog::ConfigureDialog(QWidget* parent, const HotkeyRegistry& registry)
-    : QDialog(parent), ui(new Ui::ConfigureDialog) {
+ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
+    : QDialog(parent), registry(registry), ui(new Ui::ConfigureDialog) {
     ui->setupUi(this);
-    ui->generalTab->PopulateHotkeyList(registry);
+    ui->hotkeysTab->Populate(registry);
     this->setConfiguration();
     this->PopulateSelectionList();
     connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
             &ConfigureDialog::UpdateVisibleTabs);
-
     adjustSize();
-
     ui->selectorList->setCurrentRow(0);
+
+    // Synchronise lists upon initialisation
+    ui->hotkeysTab->EmitHotkeysChanged();
 }
 
 ConfigureDialog::~ConfigureDialog() = default;
@@ -34,11 +36,13 @@ void ConfigureDialog::applyConfiguration() {
     ui->systemTab->applyConfiguration();
     ui->profileManagerTab->applyConfiguration();
     ui->inputTab->applyConfiguration();
+    ui->hotkeysTab->applyConfiguration(registry);
     ui->graphicsTab->applyConfiguration();
     ui->audioTab->applyConfiguration();
     ui->debugTab->applyConfiguration();
     ui->webTab->applyConfiguration();
     Settings::Apply();
+    Settings::LogSettings();
 }
 
 void ConfigureDialog::PopulateSelectionList() {
@@ -46,7 +50,7 @@ void ConfigureDialog::PopulateSelectionList() {
         {{tr("General"), {tr("General"), tr("Web"), tr("Debug"), tr("Game List")}},
          {tr("System"), {tr("System"), tr("Profiles"), tr("Audio")}},
          {tr("Graphics"), {tr("Graphics")}},
-         {tr("Controls"), {tr("Input")}}}};
+         {tr("Controls"), {tr("Input"), tr("Hotkeys")}}}};
 
     for (const auto& entry : items) {
         auto* const item = new QListWidgetItem(entry.first);
@@ -65,6 +69,7 @@ void ConfigureDialog::UpdateVisibleTabs() {
                                                  {tr("System"), ui->systemTab},
                                                  {tr("Profiles"), ui->profileManagerTab},
                                                  {tr("Input"), ui->inputTab},
+                                                 {tr("Hotkeys"), ui->hotkeysTab},
                                                  {tr("Graphics"), ui->graphicsTab},
                                                  {tr("Audio"), ui->audioTab},
                                                  {tr("Debug"), ui->debugTab},
diff --git a/src/yuzu/configuration/configure_dialog.h b/src/yuzu/configuration/configure_dialog.h
index 243d9fa09..2363ba584 100644
--- a/src/yuzu/configuration/configure_dialog.h
+++ b/src/yuzu/configuration/configure_dialog.h
@@ -17,7 +17,7 @@ class ConfigureDialog : public QDialog {
     Q_OBJECT
 
 public:
-    explicit ConfigureDialog(QWidget* parent, const HotkeyRegistry& registry);
+    explicit ConfigureDialog(QWidget* parent, HotkeyRegistry& registry);
     ~ConfigureDialog() override;
 
     void applyConfiguration();
@@ -28,4 +28,5 @@ private:
     void PopulateSelectionList();
 
     std::unique_ptr<Ui::ConfigureDialog> ui;
+    HotkeyRegistry& registry;
 };
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 4116b6cd7..e48f4f5a3 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -28,25 +28,19 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
 ConfigureGeneral::~ConfigureGeneral() = default;
 
 void ConfigureGeneral::setConfiguration() {
-    ui->toggle_deepscan->setChecked(UISettings::values.gamedir_deepscan);
+    ui->toggle_deepscan->setChecked(UISettings::values.game_directory_deepscan);
     ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
     ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
     ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
     ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
-    ui->enable_nfc->setChecked(Settings::values.enable_nfc);
-}
-
-void ConfigureGeneral::PopulateHotkeyList(const HotkeyRegistry& registry) {
-    ui->widget->Populate(registry);
 }
 
 void ConfigureGeneral::applyConfiguration() {
-    UISettings::values.gamedir_deepscan = ui->toggle_deepscan->isChecked();
+    UISettings::values.game_directory_deepscan = ui->toggle_deepscan->isChecked();
     UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
     UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked();
     UISettings::values.theme =
         ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
 
     Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
-    Settings::values.enable_nfc = ui->enable_nfc->isChecked();
 }
diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h
index 59738af40..df41d995b 100644
--- a/src/yuzu/configuration/configure_general.h
+++ b/src/yuzu/configuration/configure_general.h
@@ -20,7 +20,6 @@ public:
     explicit ConfigureGeneral(QWidget* parent = nullptr);
     ~ConfigureGeneral() override;
 
-    void PopulateHotkeyList(const HotkeyRegistry& registry);
     void applyConfiguration();
 
 private:
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index dff0ad5d0..1a5721fe7 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -71,26 +71,6 @@
       </widget>
      </item>
      <item>
-      <widget class="QGroupBox" name="EmulationGroupBox">
-       <property name="title">
-        <string>Emulation</string>
-       </property>
-       <layout class="QHBoxLayout" name="EmulationHorizontalLayout">
-        <item>
-         <layout class="QVBoxLayout" name="EmulationVerticalLayout">
-          <item>
-           <widget class="QCheckBox" name="enable_nfc">
-            <property name="text">
-             <string>Enable NFC</string>
-            </property>
-           </widget>
-          </item>
-         </layout>
-        </item>
-       </layout>
-      </widget>
-     </item>
-     <item>
       <widget class="QGroupBox" name="theme_group_box">
        <property name="title">
         <string>Theme</string>
@@ -118,22 +98,6 @@
       </widget>
      </item>
      <item>
-      <widget class="QGroupBox" name="HotKeysGroupBox">
-       <property name="title">
-        <string>Hotkeys</string>
-       </property>
-       <layout class="QHBoxLayout" name="HotKeysHorizontalLayout">
-        <item>
-         <layout class="QVBoxLayout" name="HotKeysVerticalLayout">
-          <item>
-           <widget class="GHotkeysDialog" name="widget" native="true"/>
-          </item>
-         </layout>
-        </item>
-       </layout>
-      </widget>
-     </item>
-     <item>
       <spacer name="verticalSpacer">
        <property name="orientation">
         <enum>Qt::Vertical</enum>
@@ -150,14 +114,6 @@
    </item>
   </layout>
  </widget>
- <customwidgets>
-  <customwidget>
-   <class>GHotkeysDialog</class>
-   <extends>QWidget</extends>
-   <header>hotkeys.h</header>
-   <container>1</container>
-  </customwidget>
- </customwidgets>
  <resources/>
  <connections/>
 </ui>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 8290b4384..dd1d67488 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -62,9 +62,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
         const QColor new_bg_color = QColorDialog::getColor(bg_color);
         if (!new_bg_color.isValid())
             return;
-        bg_color = new_bg_color;
-        ui->bg_button->setStyleSheet(
-            QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
+        UpdateBackgroundColorButton(new_bg_color);
     });
 }
 
@@ -75,11 +73,12 @@ void ConfigureGraphics::setConfiguration() {
         static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
     ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
     ui->frame_limit->setValue(Settings::values.frame_limit);
+    ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
     ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
-    bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
-                                Settings::values.bg_blue);
-    ui->bg_button->setStyleSheet(
-        QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
+    ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+    UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
+                                                 Settings::values.bg_blue));
 }
 
 void ConfigureGraphics::applyConfiguration() {
@@ -87,8 +86,21 @@ void ConfigureGraphics::applyConfiguration() {
         ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
     Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
     Settings::values.frame_limit = ui->frame_limit->value();
+    Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
     Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
+    Settings::values.use_asynchronous_gpu_emulation =
+        ui->use_asynchronous_gpu_emulation->isChecked();
     Settings::values.bg_red = static_cast<float>(bg_color.redF());
     Settings::values.bg_green = static_cast<float>(bg_color.greenF());
     Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
 }
+
+void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
+    bg_color = color;
+
+    QPixmap pixmap(ui->bg_button->size());
+    pixmap.fill(bg_color);
+
+    const QIcon color_icon(pixmap);
+    ui->bg_button->setIcon(color_icon);
+}
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index d6ffc6fde..f2799822d 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -23,6 +23,8 @@ public:
 private:
     void setConfiguration();
 
+    void UpdateBackgroundColorButton(QColor color);
+
     std::unique_ptr<Ui::ConfigureGraphics> ui;
     QColor bg_color;
 };
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index e278cdd05..c6767e0ca 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -50,6 +50,13 @@
          </layout>
         </item>
         <item>
+         <widget class="QCheckBox" name="use_disk_shader_cache">
+          <property name="text">
+           <string>Use disk shader cache</string>
+          </property>
+         </widget>
+        </item>
+        <item>
          <widget class="QCheckBox" name="use_accurate_gpu_emulation">
           <property name="text">
            <string>Use accurate GPU emulation (slow)</string>
@@ -57,6 +64,13 @@
          </widget>
         </item>
         <item>
+         <widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
+          <property name="text">
+           <string>Use asynchronous GPU emulation</string>
+          </property>
+         </widget>
+        </item>
+        <item>
          <layout class="QHBoxLayout" name="horizontalLayout">
           <item>
            <widget class="QLabel" name="label">
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp
new file mode 100644
index 000000000..bfb562535
--- /dev/null
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -0,0 +1,121 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <QMessageBox>
+#include <QStandardItemModel>
+#include "core/settings.h"
+#include "ui_configure_hotkeys.h"
+#include "yuzu/configuration/configure_hotkeys.h"
+#include "yuzu/hotkeys.h"
+#include "yuzu/util/sequence_dialog/sequence_dialog.h"
+
+ConfigureHotkeys::ConfigureHotkeys(QWidget* parent)
+    : QWidget(parent), ui(std::make_unique<Ui::ConfigureHotkeys>()) {
+    ui->setupUi(this);
+    setFocusPolicy(Qt::ClickFocus);
+
+    model = new QStandardItemModel(this);
+    model->setColumnCount(3);
+    model->setHorizontalHeaderLabels({tr("Action"), tr("Hotkey"), tr("Context")});
+
+    connect(ui->hotkey_list, &QTreeView::doubleClicked, this, &ConfigureHotkeys::Configure);
+    ui->hotkey_list->setModel(model);
+
+    // TODO(Kloen): Make context configurable as well (hiding the column for now)
+    ui->hotkey_list->hideColumn(2);
+
+    ui->hotkey_list->setColumnWidth(0, 200);
+    ui->hotkey_list->resizeColumnToContents(1);
+}
+
+ConfigureHotkeys::~ConfigureHotkeys() = default;
+
+void ConfigureHotkeys::EmitHotkeysChanged() {
+    emit HotkeysChanged(GetUsedKeyList());
+}
+
+QList<QKeySequence> ConfigureHotkeys::GetUsedKeyList() const {
+    QList<QKeySequence> list;
+    for (int r = 0; r < model->rowCount(); r++) {
+        const QStandardItem* parent = model->item(r, 0);
+        for (int r2 = 0; r2 < parent->rowCount(); r2++) {
+            const QStandardItem* keyseq = parent->child(r2, 1);
+            list << QKeySequence::fromString(keyseq->text(), QKeySequence::NativeText);
+        }
+    }
+    return list;
+}
+
+void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
+    for (const auto& group : registry.hotkey_groups) {
+        auto* parent_item = new QStandardItem(group.first);
+        parent_item->setEditable(false);
+        for (const auto& hotkey : group.second) {
+            auto* action = new QStandardItem(hotkey.first);
+            auto* keyseq =
+                new QStandardItem(hotkey.second.keyseq.toString(QKeySequence::NativeText));
+            action->setEditable(false);
+            keyseq->setEditable(false);
+            parent_item->appendRow({action, keyseq});
+        }
+        model->appendRow(parent_item);
+    }
+
+    ui->hotkey_list->expandAll();
+}
+
+void ConfigureHotkeys::Configure(QModelIndex index) {
+    if (index.parent() == QModelIndex())
+        return;
+
+    index = index.sibling(index.row(), 1);
+    auto* model = ui->hotkey_list->model();
+    auto previous_key = model->data(index);
+
+    auto* hotkey_dialog = new SequenceDialog;
+    int return_code = hotkey_dialog->exec();
+
+    auto key_sequence = hotkey_dialog->GetSequence();
+
+    if (return_code == QDialog::Rejected || key_sequence.isEmpty())
+        return;
+
+    if (IsUsedKey(key_sequence) && key_sequence != QKeySequence(previous_key.toString())) {
+        QMessageBox::critical(this, tr("Error in inputted key"),
+                              tr("You're using a key that's already bound."));
+    } else {
+        model->setData(index, key_sequence.toString(QKeySequence::NativeText));
+        EmitHotkeysChanged();
+    }
+}
+
+bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) {
+    return GetUsedKeyList().contains(key_sequence);
+}
+
+void ConfigureHotkeys::applyConfiguration(HotkeyRegistry& registry) {
+    for (int key_id = 0; key_id < model->rowCount(); key_id++) {
+        const QStandardItem* parent = model->item(key_id, 0);
+        for (int key_column_id = 0; key_column_id < parent->rowCount(); key_column_id++) {
+            const QStandardItem* action = parent->child(key_column_id, 0);
+            const QStandardItem* keyseq = parent->child(key_column_id, 1);
+            for (auto& [group, sub_actions] : registry.hotkey_groups) {
+                if (group != parent->text())
+                    continue;
+                for (auto& [action_name, hotkey] : sub_actions) {
+                    if (action_name != action->text())
+                        continue;
+                    hotkey.keyseq = QKeySequence(keyseq->text());
+                }
+            }
+        }
+    }
+
+    registry.SaveHotkeys();
+    Settings::Apply();
+}
+
+void ConfigureHotkeys::retranslateUi() {
+    ui->retranslateUi(this);
+}
diff --git a/src/yuzu/configuration/configure_hotkeys.h b/src/yuzu/configuration/configure_hotkeys.h
new file mode 100644
index 000000000..cd203aad6
--- /dev/null
+++ b/src/yuzu/configuration/configure_hotkeys.h
@@ -0,0 +1,48 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <QWidget>
+#include "core/settings.h"
+
+namespace Ui {
+class ConfigureHotkeys;
+}
+
+class HotkeyRegistry;
+class QStandardItemModel;
+
+class ConfigureHotkeys : public QWidget {
+    Q_OBJECT
+
+public:
+    explicit ConfigureHotkeys(QWidget* parent = nullptr);
+    ~ConfigureHotkeys() override;
+
+    void applyConfiguration(HotkeyRegistry& registry);
+    void retranslateUi();
+
+    void EmitHotkeysChanged();
+
+    /**
+     * Populates the hotkey list widget using data from the provided registry.
+     * Called everytime the Configure dialog is opened.
+     * @param registry The HotkeyRegistry whose data is used to populate the list.
+     */
+    void Populate(const HotkeyRegistry& registry);
+
+signals:
+    void HotkeysChanged(QList<QKeySequence> new_key_list);
+
+private:
+    void Configure(QModelIndex index);
+    bool IsUsedKey(QKeySequence key_sequence);
+    QList<QKeySequence> GetUsedKeyList() const;
+
+    std::unique_ptr<Ui::ConfigureHotkeys> ui;
+
+    QStandardItemModel* model;
+};
diff --git a/src/yuzu/configuration/configure_hotkeys.ui b/src/yuzu/configuration/configure_hotkeys.ui
new file mode 100644
index 000000000..0d0b70f38
--- /dev/null
+++ b/src/yuzu/configuration/configure_hotkeys.ui
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>ConfigureHotkeys</class>
+ <widget class="QWidget" name="ConfigureHotkeys">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>363</width>
+    <height>388</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Hotkey Settings</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout">
+   <item>
+    <layout class="QVBoxLayout" name="verticalLayout_2">
+     <item>
+      <widget class="QLabel" name="label_2">
+       <property name="text">
+        <string>Double-click on a binding to change it.</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QTreeView" name="hotkey_list">
+       <property name="editTriggers">
+        <set>QAbstractItemView::NoEditTriggers</set>
+       </property>
+       <property name="sortingEnabled">
+        <bool>false</bool>
+       </property>
+      </widget>
+     </item>
+    </layout>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>
+\ No newline at end of file
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index ba2b32c4f..c5a245ebe 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -7,6 +7,7 @@
 #include <utility>
 #include <QColorDialog>
 #include <QGridLayout>
+#include <QKeyEvent>
 #include <QMenu>
 #include <QMessageBox>
 #include <QTimer>
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h
index 7a53f6715..ade8d4435 100644
--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -11,17 +11,21 @@
 #include <string>
 
 #include <QDialog>
-#include <QKeyEvent>
 
 #include "common/param_package.h"
 #include "core/settings.h"
-#include "input_common/main.h"
 #include "ui_configure_input.h"
 
+class QKeyEvent;
 class QPushButton;
 class QString;
 class QTimer;
 
+namespace InputCommon::Polling {
+class DevicePoller;
+enum class DeviceType;
+} // namespace InputCommon::Polling
+
 namespace Ui {
 class ConfigureInputPlayer;
 }
diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_general.cpp
index e13d2eac8..022b94609 100644
--- a/src/yuzu/configuration/configure_per_general.cpp
+++ b/src/yuzu/configuration/configure_per_general.cpp
@@ -8,7 +8,6 @@
 
 #include <QHeaderView>
 #include <QMenu>
-#include <QMessageBox>
 #include <QStandardItemModel>
 #include <QString>
 #include <QTimer>
diff --git a/src/yuzu/configuration/configure_per_general.h b/src/yuzu/configuration/configure_per_general.h
index a4494446c..f8a7d5326 100644
--- a/src/yuzu/configuration/configure_per_general.h
+++ b/src/yuzu/configuration/configure_per_general.h
@@ -7,16 +7,16 @@
 #include <memory>
 #include <vector>
 
-#include <QKeyEvent>
+#include <QDialog>
 #include <QList>
-#include <QWidget>
 
 #include "core/file_sys/vfs_types.h"
 
-class QTreeView;
 class QGraphicsScene;
 class QStandardItem;
 class QStandardItemModel;
+class QTreeView;
+class QVBoxLayout;
 
 namespace Ui {
 class ConfigurePerGameGeneral;
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index 94e27349d..10645a2b3 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -2,23 +2,19 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <algorithm>
+#include <array>
+#include <chrono>
+#include <optional>
+
 #include <QFileDialog>
 #include <QGraphicsItem>
-#include <QGraphicsScene>
-#include <QHeaderView>
 #include <QMessageBox>
-#include <QStandardItemModel>
-#include <QTreeView>
-#include <QVBoxLayout>
 #include "common/assert.h"
 #include "common/file_util.h"
-#include "common/string_util.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "ui_configure_system.h"
 #include "yuzu/configuration/configure_system.h"
-#include "yuzu/util/limitable_input_dialog.h"
 
 namespace {
 constexpr std::array<int, 12> days_in_month = {{
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.h b/src/yuzu/configuration/configure_touchscreen_advanced.h
index 41cd255fb..3d0772c87 100644
--- a/src/yuzu/configuration/configure_touchscreen_advanced.h
+++ b/src/yuzu/configuration/configure_touchscreen_advanced.h
@@ -6,8 +6,6 @@
 
 #include <memory>
 #include <QDialog>
-#include <QWidget>
-#include "yuzu/configuration/config.h"
 
 namespace Ui {
 class ConfigureTouchscreenAdvanced;
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
deleted file mode 100644
index 209798521..000000000
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ /dev/null
@@ -1,461 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <QBoxLayout>
-#include <QComboBox>
-#include <QDebug>
-#include <QFileDialog>
-#include <QLabel>
-#include <QMouseEvent>
-#include <QPushButton>
-#include <QScrollArea>
-#include <QSpinBox>
-#include "common/vector_math.h"
-#include "core/core.h"
-#include "core/memory.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
-#include "video_core/textures/decoders.h"
-#include "video_core/textures/texture.h"
-#include "yuzu/debugger/graphics/graphics_surface.h"
-#include "yuzu/util/spinbox.h"
-
-static Tegra::Texture::TextureFormat ConvertToTextureFormat(
-    Tegra::RenderTargetFormat render_target_format) {
-    switch (render_target_format) {
-    case Tegra::RenderTargetFormat::RGBA8_UNORM:
-        return Tegra::Texture::TextureFormat::A8R8G8B8;
-    case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
-        return Tegra::Texture::TextureFormat::A2B10G10R10;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented RT format");
-        return Tegra::Texture::TextureFormat::A8R8G8B8;
-    }
-}
-
-SurfacePicture::SurfacePicture(QWidget* parent, GraphicsSurfaceWidget* surface_widget_)
-    : QLabel(parent), surface_widget(surface_widget_) {}
-
-SurfacePicture::~SurfacePicture() = default;
-
-void SurfacePicture::mousePressEvent(QMouseEvent* event) {
-    // Only do something while the left mouse button is held down
-    if (!(event->buttons() & Qt::LeftButton))
-        return;
-
-    if (pixmap() == nullptr)
-        return;
-
-    if (surface_widget)
-        surface_widget->Pick(event->x() * pixmap()->width() / width(),
-                             event->y() * pixmap()->height() / height());
-}
-
-void SurfacePicture::mouseMoveEvent(QMouseEvent* event) {
-    // We also want to handle the event if the user moves the mouse while holding down the LMB
-    mousePressEvent(event);
-}
-
-GraphicsSurfaceWidget::GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext> debug_context,
-                                             QWidget* parent)
-    : BreakPointObserverDock(debug_context, tr("Maxwell Surface Viewer"), parent),
-      surface_source(Source::RenderTarget0) {
-    setObjectName("MaxwellSurface");
-
-    surface_source_list = new QComboBox;
-    surface_source_list->addItem(tr("Render Target 0"));
-    surface_source_list->addItem(tr("Render Target 1"));
-    surface_source_list->addItem(tr("Render Target 2"));
-    surface_source_list->addItem(tr("Render Target 3"));
-    surface_source_list->addItem(tr("Render Target 4"));
-    surface_source_list->addItem(tr("Render Target 5"));
-    surface_source_list->addItem(tr("Render Target 6"));
-    surface_source_list->addItem(tr("Render Target 7"));
-    surface_source_list->addItem(tr("Z Buffer"));
-    surface_source_list->addItem(tr("Custom"));
-    surface_source_list->setCurrentIndex(static_cast<int>(surface_source));
-
-    surface_address_control = new CSpinBox;
-    surface_address_control->SetBase(16);
-    surface_address_control->SetRange(0, 0x7FFFFFFFFFFFFFFF);
-    surface_address_control->SetPrefix("0x");
-
-    unsigned max_dimension = 16384; // TODO: Find actual maximum
-
-    surface_width_control = new QSpinBox;
-    surface_width_control->setRange(0, max_dimension);
-
-    surface_height_control = new QSpinBox;
-    surface_height_control->setRange(0, max_dimension);
-
-    surface_picker_x_control = new QSpinBox;
-    surface_picker_x_control->setRange(0, max_dimension - 1);
-
-    surface_picker_y_control = new QSpinBox;
-    surface_picker_y_control->setRange(0, max_dimension - 1);
-
-    surface_format_control = new QComboBox;
-
-    // Color formats sorted by Maxwell texture format index
-    surface_format_control->addItem(tr("None"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("A8R8G8B8"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("DXT1"));
-    surface_format_control->addItem(tr("DXT23"));
-    surface_format_control->addItem(tr("DXT45"));
-    surface_format_control->addItem(tr("DXN1"));
-    surface_format_control->addItem(tr("DXN2"));
-
-    surface_info_label = new QLabel();
-    surface_info_label->setWordWrap(true);
-
-    surface_picture_label = new SurfacePicture(0, this);
-    surface_picture_label->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed);
-    surface_picture_label->setAlignment(Qt::AlignLeft | Qt::AlignTop);
-    surface_picture_label->setScaledContents(false);
-
-    auto scroll_area = new QScrollArea();
-    scroll_area->setBackgroundRole(QPalette::Dark);
-    scroll_area->setWidgetResizable(false);
-    scroll_area->setWidget(surface_picture_label);
-
-    save_surface = new QPushButton(QIcon::fromTheme("document-save"), tr("Save"));
-
-    // Connections
-    connect(this, &GraphicsSurfaceWidget::Update, this, &GraphicsSurfaceWidget::OnUpdate);
-    connect(surface_source_list,
-            static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
-            &GraphicsSurfaceWidget::OnSurfaceSourceChanged);
-    connect(surface_address_control, &CSpinBox::ValueChanged, this,
-            &GraphicsSurfaceWidget::OnSurfaceAddressChanged);
-    connect(surface_width_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfaceWidthChanged);
-    connect(surface_height_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfaceHeightChanged);
-    connect(surface_format_control,
-            static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
-            &GraphicsSurfaceWidget::OnSurfaceFormatChanged);
-    connect(surface_picker_x_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfacePickerXChanged);
-    connect(surface_picker_y_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfacePickerYChanged);
-    connect(save_surface, &QPushButton::clicked, this, &GraphicsSurfaceWidget::SaveSurface);
-
-    auto main_widget = new QWidget;
-    auto main_layout = new QVBoxLayout;
-    {
-        auto sub_layout = new QHBoxLayout;
-        sub_layout->addWidget(new QLabel(tr("Source:")));
-        sub_layout->addWidget(surface_source_list);
-        main_layout->addLayout(sub_layout);
-    }
-    {
-        auto sub_layout = new QHBoxLayout;
-        sub_layout->addWidget(new QLabel(tr("GPU Address:")));
-        sub_layout->addWidget(surface_address_control);
-        main_layout->addLayout(sub_layout);
-    }
-    {
-        auto sub_layout = new QHBoxLayout;
-        sub_layout->addWidget(new QLabel(tr("Width:")));
-        sub_layout->addWidget(surface_width_control);
-        main_layout->addLayout(sub_layout);
-    }
-    {
-        auto sub_layout = new QHBoxLayout;
-        sub_layout->addWidget(new QLabel(tr("Height:")));
-        sub_layout->addWidget(surface_height_control);
-        main_layout->addLayout(sub_layout);
-    }
-    {
-        auto sub_layout = new QHBoxLayout;
-        sub_layout->addWidget(new QLabel(tr("Format:")));
-        sub_layout->addWidget(surface_format_control);
-        main_layout->addLayout(sub_layout);
-    }
-    main_layout->addWidget(scroll_area);
-
-    auto info_layout = new QHBoxLayout;
-    {
-        auto xy_layout = new QVBoxLayout;
-        {
-            {
-                auto sub_layout = new QHBoxLayout;
-                sub_layout->addWidget(new QLabel(tr("X:")));
-                sub_layout->addWidget(surface_picker_x_control);
-                xy_layout->addLayout(sub_layout);
-            }
-            {
-                auto sub_layout = new QHBoxLayout;
-                sub_layout->addWidget(new QLabel(tr("Y:")));
-                sub_layout->addWidget(surface_picker_y_control);
-                xy_layout->addLayout(sub_layout);
-            }
-        }
-        info_layout->addLayout(xy_layout);
-        surface_info_label->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Minimum);
-        info_layout->addWidget(surface_info_label);
-    }
-    main_layout->addLayout(info_layout);
-
-    main_layout->addWidget(save_surface);
-    main_widget->setLayout(main_layout);
-    setWidget(main_widget);
-
-    // Load current data - TODO: Make sure this works when emulation is not running
-    if (debug_context && debug_context->at_breakpoint) {
-        emit Update();
-        widget()->setEnabled(debug_context->at_breakpoint);
-    } else {
-        widget()->setEnabled(false);
-    }
-}
-
-void GraphicsSurfaceWidget::OnBreakPointHit(Tegra::DebugContext::Event event, void* data) {
-    emit Update();
-    widget()->setEnabled(true);
-}
-
-void GraphicsSurfaceWidget::OnResumed() {
-    widget()->setEnabled(false);
-}
-
-void GraphicsSurfaceWidget::OnSurfaceSourceChanged(int new_value) {
-    surface_source = static_cast<Source>(new_value);
-    emit Update();
-}
-
-void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) {
-    if (surface_address != new_value) {
-        surface_address = static_cast<Tegra::GPUVAddr>(new_value);
-
-        surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
-        emit Update();
-    }
-}
-
-void GraphicsSurfaceWidget::OnSurfaceWidthChanged(int new_value) {
-    if (surface_width != static_cast<unsigned>(new_value)) {
-        surface_width = static_cast<unsigned>(new_value);
-
-        surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
-        emit Update();
-    }
-}
-
-void GraphicsSurfaceWidget::OnSurfaceHeightChanged(int new_value) {
-    if (surface_height != static_cast<unsigned>(new_value)) {
-        surface_height = static_cast<unsigned>(new_value);
-
-        surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
-        emit Update();
-    }
-}
-
-void GraphicsSurfaceWidget::OnSurfaceFormatChanged(int new_value) {
-    if (surface_format != static_cast<Tegra::Texture::TextureFormat>(new_value)) {
-        surface_format = static_cast<Tegra::Texture::TextureFormat>(new_value);
-
-        surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
-        emit Update();
-    }
-}
-
-void GraphicsSurfaceWidget::OnSurfacePickerXChanged(int new_value) {
-    if (surface_picker_x != new_value) {
-        surface_picker_x = new_value;
-        Pick(surface_picker_x, surface_picker_y);
-    }
-}
-
-void GraphicsSurfaceWidget::OnSurfacePickerYChanged(int new_value) {
-    if (surface_picker_y != new_value) {
-        surface_picker_y = new_value;
-        Pick(surface_picker_x, surface_picker_y);
-    }
-}
-
-void GraphicsSurfaceWidget::Pick(int x, int y) {
-    surface_picker_x_control->setValue(x);
-    surface_picker_y_control->setValue(y);
-
-    if (x < 0 || x >= static_cast<int>(surface_width) || y < 0 ||
-        y >= static_cast<int>(surface_height)) {
-        surface_info_label->setText(tr("Pixel out of bounds"));
-        surface_info_label->setAlignment(Qt::AlignLeft | Qt::AlignVCenter);
-        return;
-    }
-
-    surface_info_label->setText(QString("Raw: <Unimplemented>\n(%1)").arg("<Unimplemented>"));
-    surface_info_label->setAlignment(Qt::AlignLeft | Qt::AlignVCenter);
-}
-
-void GraphicsSurfaceWidget::OnUpdate() {
-    auto& gpu = Core::System::GetInstance().GPU();
-
-    QPixmap pixmap;
-
-    switch (surface_source) {
-    case Source::RenderTarget0:
-    case Source::RenderTarget1:
-    case Source::RenderTarget2:
-    case Source::RenderTarget3:
-    case Source::RenderTarget4:
-    case Source::RenderTarget5:
-    case Source::RenderTarget6:
-    case Source::RenderTarget7: {
-        // TODO: Store a reference to the registers in the debug context instead of accessing them
-        // directly...
-
-        const auto& registers = gpu.Maxwell3D().regs;
-        const auto& rt = registers.rt[static_cast<std::size_t>(surface_source) -
-                                      static_cast<std::size_t>(Source::RenderTarget0)];
-
-        surface_address = rt.Address();
-        surface_width = rt.width;
-        surface_height = rt.height;
-        if (rt.format != Tegra::RenderTargetFormat::NONE) {
-            surface_format = ConvertToTextureFormat(rt.format);
-        }
-
-        break;
-    }
-
-    case Source::Custom: {
-        // Keep user-specified values
-        break;
-    }
-
-    default:
-        qDebug() << "Unknown surface source " << static_cast<int>(surface_source);
-        break;
-    }
-
-    surface_address_control->SetValue(surface_address);
-    surface_width_control->setValue(surface_width);
-    surface_height_control->setValue(surface_height);
-    surface_format_control->setCurrentIndex(static_cast<int>(surface_format));
-
-    if (surface_address == 0) {
-        surface_picture_label->hide();
-        surface_info_label->setText(tr("(invalid surface address)"));
-        surface_info_label->setAlignment(Qt::AlignCenter);
-        surface_picker_x_control->setEnabled(false);
-        surface_picker_y_control->setEnabled(false);
-        save_surface->setEnabled(false);
-        return;
-    }
-
-    // TODO: Implement a good way to visualize alpha components!
-
-    QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
-    std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
-
-    // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
-    // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
-    auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
-        *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width,
-        surface_height, 1U);
-
-    auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
-                                                      surface_width, surface_height);
-
-    surface_picture_label->show();
-
-    for (unsigned int y = 0; y < surface_height; ++y) {
-        for (unsigned int x = 0; x < surface_width; ++x) {
-            Math::Vec4<u8> color;
-            color[0] = texture_data[x + y * surface_width + 0];
-            color[1] = texture_data[x + y * surface_width + 1];
-            color[2] = texture_data[x + y * surface_width + 2];
-            color[3] = texture_data[x + y * surface_width + 3];
-            decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a()));
-        }
-    }
-
-    pixmap = QPixmap::fromImage(decoded_image);
-    surface_picture_label->setPixmap(pixmap);
-    surface_picture_label->resize(pixmap.size());
-
-    // Update the info with pixel data
-    surface_picker_x_control->setEnabled(true);
-    surface_picker_y_control->setEnabled(true);
-    Pick(surface_picker_x, surface_picker_y);
-
-    // Enable saving the converted pixmap to file
-    save_surface->setEnabled(true);
-}
-
-void GraphicsSurfaceWidget::SaveSurface() {
-    QString png_filter = tr("Portable Network Graphic (*.png)");
-    QString bin_filter = tr("Binary data (*.bin)");
-
-    QString selectedFilter;
-    QString filename = QFileDialog::getSaveFileName(
-        this, tr("Save Surface"),
-        QString("texture-0x%1.png").arg(QString::number(surface_address, 16)),
-        QString("%1;;%2").arg(png_filter, bin_filter), &selectedFilter);
-
-    if (filename.isEmpty()) {
-        // If the user canceled the dialog, don't save anything.
-        return;
-    }
-
-    if (selectedFilter == png_filter) {
-        const QPixmap* pixmap = surface_picture_label->pixmap();
-        ASSERT_MSG(pixmap != nullptr, "No pixmap set");
-
-        QFile file(filename);
-        file.open(QIODevice::WriteOnly);
-        if (pixmap)
-            pixmap->save(&file, "PNG");
-    } else if (selectedFilter == bin_filter) {
-        auto& gpu = Core::System::GetInstance().GPU();
-        std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
-
-        const u8* buffer = Memory::GetPointer(*address);
-        ASSERT_MSG(buffer != nullptr, "Memory not accessible");
-
-        QFile file(filename);
-        file.open(QIODevice::WriteOnly);
-        int size = surface_width * surface_height * Tegra::Texture::BytesPerPixel(surface_format);
-        QByteArray data(reinterpret_cast<const char*>(buffer), size);
-        file.write(data);
-    } else {
-        UNREACHABLE_MSG("Unhandled filter selected");
-    }
-}
diff --git a/src/yuzu/debugger/graphics/graphics_surface.h b/src/yuzu/debugger/graphics/graphics_surface.h
deleted file mode 100644
index 323e39d94..000000000
--- a/src/yuzu/debugger/graphics/graphics_surface.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <QLabel>
-#include <QPushButton>
-#include "video_core/memory_manager.h"
-#include "video_core/textures/texture.h"
-#include "yuzu/debugger/graphics/graphics_breakpoint_observer.h"
-
-class QComboBox;
-class QSpinBox;
-class CSpinBox;
-
-class GraphicsSurfaceWidget;
-
-class SurfacePicture : public QLabel {
-    Q_OBJECT
-
-public:
-    explicit SurfacePicture(QWidget* parent = nullptr,
-                            GraphicsSurfaceWidget* surface_widget = nullptr);
-    ~SurfacePicture() override;
-
-protected slots:
-    void mouseMoveEvent(QMouseEvent* event) override;
-    void mousePressEvent(QMouseEvent* event) override;
-
-private:
-    GraphicsSurfaceWidget* surface_widget;
-};
-
-class GraphicsSurfaceWidget : public BreakPointObserverDock {
-    Q_OBJECT
-
-    using Event = Tegra::DebugContext::Event;
-
-    enum class Source {
-        RenderTarget0 = 0,
-        RenderTarget1 = 1,
-        RenderTarget2 = 2,
-        RenderTarget3 = 3,
-        RenderTarget4 = 4,
-        RenderTarget5 = 5,
-        RenderTarget6 = 6,
-        RenderTarget7 = 7,
-        ZBuffer = 8,
-        Custom = 9,
-    };
-
-public:
-    explicit GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext> debug_context,
-                                   QWidget* parent = nullptr);
-    void Pick(int x, int y);
-
-public slots:
-    void OnSurfaceSourceChanged(int new_value);
-    void OnSurfaceAddressChanged(qint64 new_value);
-    void OnSurfaceWidthChanged(int new_value);
-    void OnSurfaceHeightChanged(int new_value);
-    void OnSurfaceFormatChanged(int new_value);
-    void OnSurfacePickerXChanged(int new_value);
-    void OnSurfacePickerYChanged(int new_value);
-    void OnUpdate();
-
-signals:
-    void Update();
-
-private:
-    void OnBreakPointHit(Tegra::DebugContext::Event event, void* data) override;
-    void OnResumed() override;
-
-    void SaveSurface();
-
-    QComboBox* surface_source_list;
-    CSpinBox* surface_address_control;
-    QSpinBox* surface_width_control;
-    QSpinBox* surface_height_control;
-    QComboBox* surface_format_control;
-
-    SurfacePicture* surface_picture_label;
-    QSpinBox* surface_picker_x_control;
-    QSpinBox* surface_picker_y_control;
-    QLabel* surface_info_label;
-    QPushButton* save_surface;
-
-    Source surface_source;
-    Tegra::GPUVAddr surface_address;
-    unsigned surface_width;
-    unsigned surface_height;
-    Tegra::Texture::TextureFormat surface_format;
-    int surface_picker_x = 0;
-    int surface_picker_y = 0;
-};
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index 8b30e0a85..86e03e46d 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -7,6 +7,7 @@
 #include <QMouseEvent>
 #include <QPainter>
 #include <QString>
+#include <QTimer>
 #include "common/common_types.h"
 #include "common/microprofile.h"
 #include "yuzu/debugger/profiler.h"
diff --git a/src/yuzu/debugger/profiler.h b/src/yuzu/debugger/profiler.h
index eae1e9e3c..8e69fdb06 100644
--- a/src/yuzu/debugger/profiler.h
+++ b/src/yuzu/debugger/profiler.h
@@ -4,10 +4,11 @@
 
 #pragma once
 
-#include <QAbstractItemModel>
-#include <QDockWidget>
-#include <QTimer>
-#include "common/microprofile.h"
+#include <QWidget>
+
+class QAction;
+class QHideEvent;
+class QShowEvent;
 
 class MicroProfileDialog : public QWidget {
     Q_OBJECT
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 0c0864742..593bb681f 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -13,7 +13,6 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/timer.h"
 #include "core/hle/kernel/wait_object.h"
 #include "core/memory.h"
 
@@ -82,9 +81,8 @@ QString WaitTreeText::GetText() const {
     return text;
 }
 
-WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) {
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
-
+WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table)
+    : mutex_address(mutex_address) {
     mutex_value = Memory::Read32(mutex_address);
     owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask);
     owner = handle_table.Get<Kernel::Thread>(owner_handle);
@@ -155,8 +153,6 @@ std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitO
     switch (object.GetHandleType()) {
     case Kernel::HandleType::ReadableEvent:
         return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object));
-    case Kernel::HandleType::Timer:
-        return std::make_unique<WaitTreeTimer>(static_cast<const Kernel::Timer&>(object));
     case Kernel::HandleType::Thread:
         return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object));
     default:
@@ -238,6 +234,9 @@ QString WaitTreeThread::GetText() const {
     case Kernel::ThreadStatus::WaitMutex:
         status = tr("waiting for mutex");
         break;
+    case Kernel::ThreadStatus::WaitCondVar:
+        status = tr("waiting for condition variable");
+        break;
     case Kernel::ThreadStatus::WaitArb:
         status = tr("waiting for address arbiter");
         break;
@@ -273,6 +272,7 @@ QColor WaitTreeThread::GetColor() const {
     case Kernel::ThreadStatus::WaitSynchAll:
     case Kernel::ThreadStatus::WaitSynchAny:
     case Kernel::ThreadStatus::WaitMutex:
+    case Kernel::ThreadStatus::WaitCondVar:
     case Kernel::ThreadStatus::WaitArb:
         return QColor(Qt::GlobalColor::red);
     case Kernel::ThreadStatus::Dormant:
@@ -319,7 +319,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
 
     const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
     if (mutex_wait_address != 0) {
-        list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address));
+        const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
+        list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table));
     } else {
         list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
     }
@@ -348,23 +349,6 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeEvent::GetChildren() const {
     return list;
 }
 
-WaitTreeTimer::WaitTreeTimer(const Kernel::Timer& object) : WaitTreeWaitObject(object) {}
-WaitTreeTimer::~WaitTreeTimer() = default;
-
-std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeTimer::GetChildren() const {
-    std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren());
-
-    const auto& timer = static_cast<const Kernel::Timer&>(object);
-
-    list.push_back(std::make_unique<WaitTreeText>(
-        tr("reset type = %1").arg(GetResetTypeQString(timer.GetResetType()))));
-    list.push_back(
-        std::make_unique<WaitTreeText>(tr("initial delay = %1").arg(timer.GetInitialDelay())));
-    list.push_back(
-        std::make_unique<WaitTreeText>(tr("interval delay = %1").arg(timer.GetIntervalDelay())));
-    return list;
-}
-
 WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::SharedPtr<Kernel::Thread>>& list)
     : thread_list(list) {}
 WaitTreeThreadList::~WaitTreeThreadList() = default;
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index e639ef412..62886609d 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -17,10 +17,10 @@
 class EmuThread;
 
 namespace Kernel {
+class HandleTable;
 class ReadableEvent;
 class WaitObject;
 class Thread;
-class Timer;
 } // namespace Kernel
 
 class WaitTreeThread;
@@ -73,7 +73,7 @@ public:
 class WaitTreeMutexInfo : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
-    explicit WaitTreeMutexInfo(VAddr mutex_address);
+    explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table);
     ~WaitTreeMutexInfo() override;
 
     QString GetText() const override;
@@ -150,15 +150,6 @@ public:
     std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
 };
 
-class WaitTreeTimer : public WaitTreeWaitObject {
-    Q_OBJECT
-public:
-    explicit WaitTreeTimer(const Kernel::Timer& object);
-    ~WaitTreeTimer() override;
-
-    std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
-};
-
 class WaitTreeThreadList : public WaitTreeExpandableItem {
     Q_OBJECT
 public:
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index c0e3c5fa9..b0ca766ec 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -18,6 +18,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/file_sys/patch_manager.h"
+#include "core/file_sys/registered_cache.h"
 #include "yuzu/compatibility_list.h"
 #include "yuzu/game_list.h"
 #include "yuzu/game_list_p.h"
@@ -193,8 +194,9 @@ void GameList::onFilterCloseClicked() {
     main_window->filterBarSetChecked(false);
 }
 
-GameList::GameList(FileSys::VirtualFilesystem vfs, GMainWindow* parent)
-    : QWidget{parent}, vfs(std::move(vfs)) {
+GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvider* provider,
+                   GMainWindow* parent)
+    : QWidget{parent}, vfs(std::move(vfs)), provider(provider) {
     watcher = new QFileSystemWatcher(this);
     connect(watcher, &QFileSystemWatcher::directoryChanged, this, &GameList::RefreshGameDirectory);
 
@@ -329,6 +331,8 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
     QMenu context_menu;
     QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
     QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location"));
+    QAction* open_transferable_shader_cache =
+        context_menu.addAction(tr("Open Transferable Shader Cache"));
     context_menu.addSeparator();
     QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
     QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
@@ -344,6 +348,8 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
             [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); });
     connect(open_lfs_location, &QAction::triggered,
             [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); });
+    connect(open_transferable_shader_cache, &QAction::triggered,
+            [&]() { emit OpenTransferableShaderCacheRequested(program_id); });
     connect(dump_romfs, &QAction::triggered, [&]() { emit DumpRomFSRequested(program_id, path); });
     connect(copy_tid, &QAction::triggered, [&]() { emit CopyTIDRequested(program_id); });
     connect(navigate_to_gamedb_entry, &QAction::triggered,
@@ -428,7 +434,8 @@ void GameList::PopulateAsync(const QString& dir_path, bool deep_scan) {
 
     emit ShouldCancelWorker();
 
-    GameListWorker* worker = new GameListWorker(vfs, dir_path, deep_scan, compatibility_list);
+    GameListWorker* worker =
+        new GameListWorker(vfs, provider, dir_path, deep_scan, compatibility_list);
 
     connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection);
     connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating,
@@ -460,9 +467,10 @@ void GameList::LoadInterfaceLayout() {
 const QStringList GameList::supported_file_extensions = {"nso", "nro", "nca", "xci", "nsp"};
 
 void GameList::RefreshGameDirectory() {
-    if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) {
+    if (!UISettings::values.game_directory_path.isEmpty() && current_worker != nullptr) {
         LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
         search_field->clear();
-        PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        PopulateAsync(UISettings::values.game_directory_path,
+                      UISettings::values.game_directory_deepscan);
     }
 }
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index b317eb2fc..56007eef8 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -26,8 +26,9 @@ class GameListSearchField;
 class GMainWindow;
 
 namespace FileSys {
+class ManualContentProvider;
 class VfsFilesystem;
-}
+} // namespace FileSys
 
 enum class GameListOpenTarget {
     SaveData,
@@ -47,7 +48,8 @@ public:
         COLUMN_COUNT, // Number of columns
     };
 
-    explicit GameList(std::shared_ptr<FileSys::VfsFilesystem> vfs, GMainWindow* parent = nullptr);
+    explicit GameList(std::shared_ptr<FileSys::VfsFilesystem> vfs,
+                      FileSys::ManualContentProvider* provider, GMainWindow* parent = nullptr);
     ~GameList() override;
 
     void clearFilter();
@@ -66,6 +68,7 @@ signals:
     void GameChosen(QString game_path);
     void ShouldCancelWorker();
     void OpenFolderRequested(u64 program_id, GameListOpenTarget target);
+    void OpenTransferableShaderCacheRequested(u64 program_id);
     void DumpRomFSRequested(u64 program_id, const std::string& game_path);
     void CopyTIDRequested(u64 program_id);
     void NavigateToGamedbEntryRequested(u64 program_id,
@@ -85,6 +88,7 @@ private:
     void RefreshGameDirectory();
 
     std::shared_ptr<FileSys::VfsFilesystem> vfs;
+    FileSys::ManualContentProvider* provider;
     GameListSearchField* search_field;
     GMainWindow* main_window = nullptr;
     QVBoxLayout* layout = nullptr;
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp
index b37710f59..8687e7c5a 100644
--- a/src/yuzu/game_list_worker.cpp
+++ b/src/yuzu/game_list_worker.cpp
@@ -12,12 +12,15 @@
 
 #include "common/common_paths.h"
 #include "common/file_util.h"
+#include "core/core.h"
+#include "core/file_sys/card_image.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/mode.h"
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/patch_manager.h"
 #include "core/file_sys/registered_cache.h"
+#include "core/file_sys/submission_package.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/loader.h"
 #include "yuzu/compatibility_list.h"
@@ -119,20 +122,25 @@ QList<QStandardItem*> MakeGameListEntry(const std::string& path, const std::stri
 }
 } // Anonymous namespace
 
-GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs, QString dir_path, bool deep_scan,
-                               const CompatibilityList& compatibility_list)
-    : vfs(std::move(vfs)), dir_path(std::move(dir_path)), deep_scan(deep_scan),
+GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs,
+                               FileSys::ManualContentProvider* provider, QString dir_path,
+                               bool deep_scan, const CompatibilityList& compatibility_list)
+    : vfs(std::move(vfs)), provider(provider), dir_path(std::move(dir_path)), deep_scan(deep_scan),
       compatibility_list(compatibility_list) {}
 
 GameListWorker::~GameListWorker() = default;
 
-void GameListWorker::AddInstalledTitlesToGameList() {
-    const auto cache = Service::FileSystem::GetUnionContents();
-    const auto installed_games = cache.ListEntriesFilter(FileSys::TitleType::Application,
-                                                         FileSys::ContentRecordType::Program);
+void GameListWorker::AddTitlesToGameList() {
+    const auto& cache = dynamic_cast<FileSys::ContentProviderUnion&>(
+        Core::System::GetInstance().GetContentProvider());
+    const auto installed_games = cache.ListEntriesFilterOrigin(
+        std::nullopt, FileSys::TitleType::Application, FileSys::ContentRecordType::Program);
 
-    for (const auto& game : installed_games) {
-        const auto file = cache.GetEntryUnparsed(game);
+    for (const auto& [slot, game] : installed_games) {
+        if (slot == FileSys::ContentProviderUnionSlot::FrontendManual)
+            continue;
+
+        const auto file = cache.GetEntryUnparsed(game.title_id, game.type);
         std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(file);
         if (!loader)
             continue;
@@ -150,45 +158,13 @@ void GameListWorker::AddInstalledTitlesToGameList() {
         emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, icon, *loader, program_id,
                                           compatibility_list, patch));
     }
-
-    const auto control_data = cache.ListEntriesFilter(FileSys::TitleType::Application,
-                                                      FileSys::ContentRecordType::Control);
-
-    for (const auto& entry : control_data) {
-        auto nca = cache.GetEntry(entry);
-        if (nca != nullptr) {
-            nca_control_map.insert_or_assign(entry.title_id, std::move(nca));
-        }
-    }
 }
 
-void GameListWorker::FillControlMap(const std::string& dir_path) {
-    const auto nca_control_callback = [this](u64* num_entries_out, const std::string& directory,
-                                             const std::string& virtual_name) -> bool {
-        if (stop_processing) {
-            // Breaks the callback loop
-            return false;
-        }
-
-        const std::string physical_name = directory + DIR_SEP + virtual_name;
-        const QFileInfo file_info(QString::fromStdString(physical_name));
-        if (!file_info.isDir() && file_info.suffix() == QStringLiteral("nca")) {
-            auto nca =
-                std::make_unique<FileSys::NCA>(vfs->OpenFile(physical_name, FileSys::Mode::Read));
-            if (nca->GetType() == FileSys::NCAContentType::Control) {
-                const u64 title_id = nca->GetTitleId();
-                nca_control_map.insert_or_assign(title_id, std::move(nca));
-            }
-        }
-        return true;
-    };
-
-    FileUtil::ForeachDirectoryEntry(nullptr, dir_path, nca_control_callback);
-}
-
-void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsigned int recursion) {
-    const auto callback = [this, recursion](u64* num_entries_out, const std::string& directory,
-                                            const std::string& virtual_name) -> bool {
+void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_path,
+                                    unsigned int recursion) {
+    const auto callback = [this, target, recursion](u64* num_entries_out,
+                                                    const std::string& directory,
+                                                    const std::string& virtual_name) -> bool {
         if (stop_processing) {
             // Breaks the callback loop.
             return false;
@@ -198,7 +174,8 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
         const bool is_dir = FileUtil::IsDirectory(physical_name);
         if (!is_dir &&
             (HasSupportedFileExtension(physical_name) || IsExtractedNCAMain(physical_name))) {
-            auto loader = Loader::GetLoader(vfs->OpenFile(physical_name, FileSys::Mode::Read));
+            const auto file = vfs->OpenFile(physical_name, FileSys::Mode::Read);
+            auto loader = Loader::GetLoader(file);
             if (!loader) {
                 return true;
             }
@@ -209,31 +186,42 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
                 return true;
             }
 
-            std::vector<u8> icon;
-            const auto res1 = loader->ReadIcon(icon);
-
             u64 program_id = 0;
             const auto res2 = loader->ReadProgramId(program_id);
 
-            std::string name = " ";
-            const auto res3 = loader->ReadTitle(name);
+            if (target == ScanTarget::FillManualContentProvider) {
+                if (res2 == Loader::ResultStatus::Success && file_type == Loader::FileType::NCA) {
+                    provider->AddEntry(FileSys::TitleType::Application,
+                                       FileSys::GetCRTypeFromNCAType(FileSys::NCA{file}.GetType()),
+                                       program_id, file);
+                } else if (res2 == Loader::ResultStatus::Success &&
+                           (file_type == Loader::FileType::XCI ||
+                            file_type == Loader::FileType::NSP)) {
+                    const auto nsp = file_type == Loader::FileType::NSP
+                                         ? std::make_shared<FileSys::NSP>(file)
+                                         : FileSys::XCI{file}.GetSecurePartitionNSP();
+                    for (const auto& title : nsp->GetNCAs()) {
+                        for (const auto& entry : title.second) {
+                            provider->AddEntry(entry.first.first, entry.first.second, title.first,
+                                               entry.second->GetBaseFile());
+                        }
+                    }
+                }
+            } else {
+                std::vector<u8> icon;
+                const auto res1 = loader->ReadIcon(icon);
 
-            const FileSys::PatchManager patch{program_id};
+                std::string name = " ";
+                const auto res3 = loader->ReadTitle(name);
 
-            if (res1 != Loader::ResultStatus::Success && res3 != Loader::ResultStatus::Success &&
-                res2 == Loader::ResultStatus::Success) {
-                // Use from metadata pool.
-                if (nca_control_map.find(program_id) != nca_control_map.end()) {
-                    const auto& nca = nca_control_map[program_id];
-                    GetMetadataFromControlNCA(patch, *nca, icon, name);
-                }
-            }
+                const FileSys::PatchManager patch{program_id};
 
-            emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id,
-                                              compatibility_list, patch));
+                emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id,
+                                                  compatibility_list, patch));
+            }
         } else if (is_dir && recursion > 0) {
             watch_list.append(QString::fromStdString(physical_name));
-            AddFstEntriesToGameList(physical_name, recursion - 1);
+            ScanFileSystem(target, physical_name, recursion - 1);
         }
 
         return true;
@@ -245,10 +233,11 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
 void GameListWorker::run() {
     stop_processing = false;
     watch_list.append(dir_path);
-    FillControlMap(dir_path.toStdString());
-    AddInstalledTitlesToGameList();
-    AddFstEntriesToGameList(dir_path.toStdString(), deep_scan ? 256 : 0);
-    nca_control_map.clear();
+    provider->ClearAllEntries();
+    ScanFileSystem(ScanTarget::FillManualContentProvider, dir_path.toStdString(),
+                   deep_scan ? 256 : 0);
+    AddTitlesToGameList();
+    ScanFileSystem(ScanTarget::PopulateGameList, dir_path.toStdString(), deep_scan ? 256 : 0);
     emit Finished(watch_list);
 }
 
diff --git a/src/yuzu/game_list_worker.h b/src/yuzu/game_list_worker.h
index 0e42d0bde..7c3074af9 100644
--- a/src/yuzu/game_list_worker.h
+++ b/src/yuzu/game_list_worker.h
@@ -33,7 +33,8 @@ class GameListWorker : public QObject, public QRunnable {
     Q_OBJECT
 
 public:
-    GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs, QString dir_path, bool deep_scan,
+    GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs,
+                   FileSys::ManualContentProvider* provider, QString dir_path, bool deep_scan,
                    const CompatibilityList& compatibility_list);
     ~GameListWorker() override;
 
@@ -58,12 +59,17 @@ signals:
     void Finished(QStringList watch_list);
 
 private:
-    void AddInstalledTitlesToGameList();
-    void FillControlMap(const std::string& dir_path);
-    void AddFstEntriesToGameList(const std::string& dir_path, unsigned int recursion = 0);
+    void AddTitlesToGameList();
+
+    enum class ScanTarget {
+        FillManualContentProvider,
+        PopulateGameList,
+    };
+
+    void ScanFileSystem(ScanTarget target, const std::string& dir_path, unsigned int recursion = 0);
 
     std::shared_ptr<FileSys::VfsFilesystem> vfs;
-    std::map<u64, std::unique_ptr<FileSys::NCA>> nca_control_map;
+    FileSys::ManualContentProvider* provider;
     QStringList watch_list;
     QString dir_path;
     bool deep_scan;
diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp
index dce399774..4582e7f21 100644
--- a/src/yuzu/hotkeys.cpp
+++ b/src/yuzu/hotkeys.cpp
@@ -2,7 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <map>
 #include <QKeySequence>
 #include <QShortcut>
 #include <QTreeWidgetItem>
@@ -13,47 +12,32 @@
 HotkeyRegistry::HotkeyRegistry() = default;
 HotkeyRegistry::~HotkeyRegistry() = default;
 
-void HotkeyRegistry::LoadHotkeys() {
-    // Make sure NOT to use a reference here because it would become invalid once we call
-    // beginGroup()
-    for (auto shortcut : UISettings::values.shortcuts) {
-        const QStringList cat = shortcut.first.split('/');
-        Q_ASSERT(cat.size() >= 2);
-
-        // RegisterHotkey assigns default keybindings, so use old values as default parameters
-        Hotkey& hk = hotkey_groups[cat[0]][cat[1]];
-        if (!shortcut.second.first.isEmpty()) {
-            hk.keyseq = QKeySequence::fromString(shortcut.second.first);
-            hk.context = static_cast<Qt::ShortcutContext>(shortcut.second.second);
-        }
-        if (hk.shortcut)
-            hk.shortcut->setKey(hk.keyseq);
-    }
-}
-
 void HotkeyRegistry::SaveHotkeys() {
     UISettings::values.shortcuts.clear();
     for (const auto& group : hotkey_groups) {
         for (const auto& hotkey : group.second) {
-            UISettings::values.shortcuts.emplace_back(
-                UISettings::Shortcut(group.first + '/' + hotkey.first,
-                                     UISettings::ContextualShortcut(hotkey.second.keyseq.toString(),
-                                                                    hotkey.second.context)));
+            UISettings::values.shortcuts.push_back(
+                {hotkey.first, group.first,
+                 UISettings::ContextualShortcut(hotkey.second.keyseq.toString(),
+                                                hotkey.second.context)});
         }
     }
 }
 
-void HotkeyRegistry::RegisterHotkey(const QString& group, const QString& action,
-                                    const QKeySequence& default_keyseq,
-                                    Qt::ShortcutContext default_context) {
-    auto& hotkey_group = hotkey_groups[group];
-    if (hotkey_group.find(action) != hotkey_group.end()) {
-        return;
+void HotkeyRegistry::LoadHotkeys() {
+    // Make sure NOT to use a reference here because it would become invalid once we call
+    // beginGroup()
+    for (auto shortcut : UISettings::values.shortcuts) {
+        Hotkey& hk = hotkey_groups[shortcut.group][shortcut.name];
+        if (!shortcut.shortcut.first.isEmpty()) {
+            hk.keyseq = QKeySequence::fromString(shortcut.shortcut.first, QKeySequence::NativeText);
+            hk.context = static_cast<Qt::ShortcutContext>(shortcut.shortcut.second);
+        }
+        if (hk.shortcut) {
+            hk.shortcut->disconnect();
+            hk.shortcut->setKey(hk.keyseq);
+        }
     }
-
-    auto& hotkey_action = hotkey_groups[group][action];
-    hotkey_action.keyseq = default_keyseq;
-    hotkey_action.context = default_context;
 }
 
 QShortcut* HotkeyRegistry::GetHotkey(const QString& group, const QString& action, QWidget* widget) {
@@ -65,24 +49,11 @@ QShortcut* HotkeyRegistry::GetHotkey(const QString& group, const QString& action
     return hk.shortcut;
 }
 
-GHotkeysDialog::GHotkeysDialog(QWidget* parent) : QWidget(parent) {
-    ui.setupUi(this);
+QKeySequence HotkeyRegistry::GetKeySequence(const QString& group, const QString& action) {
+    return hotkey_groups[group][action].keyseq;
 }
 
-void GHotkeysDialog::Populate(const HotkeyRegistry& registry) {
-    for (const auto& group : registry.hotkey_groups) {
-        QTreeWidgetItem* toplevel_item = new QTreeWidgetItem(QStringList(group.first));
-        for (const auto& hotkey : group.second) {
-            QStringList columns;
-            columns << hotkey.first << hotkey.second.keyseq.toString();
-            QTreeWidgetItem* item = new QTreeWidgetItem(columns);
-            toplevel_item->addChild(item);
-        }
-        ui.treeWidget->addTopLevelItem(toplevel_item);
-    }
-    // TODO: Make context configurable as well (hiding the column for now)
-    ui.treeWidget->setColumnCount(2);
-
-    ui.treeWidget->resizeColumnToContents(0);
-    ui.treeWidget->resizeColumnToContents(1);
+Qt::ShortcutContext HotkeyRegistry::GetShortcutContext(const QString& group,
+                                                       const QString& action) {
+    return hotkey_groups[group][action].context;
 }
diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h
index f38e6c002..4f526dc7e 100644
--- a/src/yuzu/hotkeys.h
+++ b/src/yuzu/hotkeys.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <map>
-#include "ui_hotkeys.h"
 
 class QDialog;
 class QKeySequence;
@@ -14,7 +13,7 @@ class QShortcut;
 
 class HotkeyRegistry final {
 public:
-    friend class GHotkeysDialog;
+    friend class ConfigureHotkeys;
 
     explicit HotkeyRegistry();
     ~HotkeyRegistry();
@@ -49,22 +48,27 @@ public:
     QShortcut* GetHotkey(const QString& group, const QString& action, QWidget* widget);
 
     /**
-     * Register a hotkey.
+     * Returns a QKeySequence object whose signal can be connected to QAction::setShortcut.
      *
-     * @param group General group this hotkey belongs to (e.g. "Main Window", "Debugger")
-     * @param action Name of the action (e.g. "Start Emulation", "Load Image")
-     * @param default_keyseq Default key sequence to assign if the hotkey wasn't present in the
-     *                       settings file before
-     * @param default_context Default context to assign if the hotkey wasn't present in the settings
-     *                        file before
-     * @warning Both the group and action strings will be displayed in the hotkey settings dialog
+     * @param group  General group this hotkey belongs to (e.g. "Main Window", "Debugger").
+     * @param action Name of the action (e.g. "Start Emulation", "Load Image").
+     */
+    QKeySequence GetKeySequence(const QString& group, const QString& action);
+
+    /**
+     * Returns a Qt::ShortcutContext object who can be connected to other
+     * QAction::setShortcutContext.
+     *
+     * @param group  General group this shortcut context belongs to (e.g. "Main Window",
+     * "Debugger").
+     * @param action Name of the action (e.g. "Start Emulation", "Load Image").
      */
-    void RegisterHotkey(const QString& group, const QString& action,
-                        const QKeySequence& default_keyseq = {},
-                        Qt::ShortcutContext default_context = Qt::WindowShortcut);
+    Qt::ShortcutContext GetShortcutContext(const QString& group, const QString& action);
 
 private:
     struct Hotkey {
+        Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
+
         QKeySequence keyseq;
         QShortcut* shortcut = nullptr;
         Qt::ShortcutContext context = Qt::WindowShortcut;
@@ -75,15 +79,3 @@ private:
 
     HotkeyGroupMap hotkey_groups;
 };
-
-class GHotkeysDialog : public QWidget {
-    Q_OBJECT
-
-public:
-    explicit GHotkeysDialog(QWidget* parent = nullptr);
-
-    void Populate(const HotkeyRegistry& registry);
-
-private:
-    Ui::hotkeys ui;
-};
diff --git a/src/yuzu/hotkeys.ui b/src/yuzu/hotkeys.ui
deleted file mode 100644
index 050fe064e..000000000
--- a/src/yuzu/hotkeys.ui
+++ /dev/null
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<ui version="4.0">
- <class>hotkeys</class>
- <widget class="QWidget" name="hotkeys">
-  <property name="geometry">
-   <rect>
-    <x>0</x>
-    <y>0</y>
-    <width>363</width>
-    <height>388</height>
-   </rect>
-  </property>
-  <property name="windowTitle">
-   <string>Hotkey Settings</string>
-  </property>
-  <layout class="QVBoxLayout" name="verticalLayout">
-   <item>
-    <widget class="QTreeWidget" name="treeWidget">
-     <property name="selectionBehavior">
-      <enum>QAbstractItemView::SelectItems</enum>
-     </property>
-     <property name="headerHidden">
-      <bool>false</bool>
-     </property>
-     <column>
-      <property name="text">
-       <string>Action</string>
-      </property>
-     </column>
-     <column>
-      <property name="text">
-       <string>Hotkey</string>
-      </property>
-     </column>
-     <column>
-      <property name="text">
-       <string>Context</string>
-      </property>
-     </column>
-    </widget>
-   </item>
-  </layout>
- </widget>
- <resources/>
- <connections/>
-</ui>
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 907aac4f1..4e2d988cd 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -43,6 +43,7 @@ QProgressBar {
 }
 QProgressBar::chunk {
   background-color: #0ab9e6;
+  width: 1px;
 })";
 
 constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"(
@@ -53,7 +54,8 @@ QProgressBar {
   padding: 2px;
 }
 QProgressBar::chunk {
- background-color: #ff3c28;
+  background-color: #ff3c28;
+  width: 1px;
 })";
 
 constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"(
@@ -190,7 +192,12 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
     }
 
     // update labels and progress bar
-    ui->stage->setText(stage_translations[stage].arg(value).arg(total));
+    if (stage == VideoCore::LoadCallbackStage::Decompile ||
+        stage == VideoCore::LoadCallbackStage::Build) {
+        ui->stage->setText(stage_translations[stage].arg(value).arg(total));
+    } else {
+        ui->stage->setText(stage_translations[stage]);
+    }
     ui->value->setText(estimate);
     ui->progress_bar->setValue(static_cast<int>(value));
     previous_time = now;
diff --git a/src/yuzu/loading_screen.ui b/src/yuzu/loading_screen.ui
index a67d273fd..820b47536 100644
--- a/src/yuzu/loading_screen.ui
+++ b/src/yuzu/loading_screen.ui
@@ -132,7 +132,7 @@ border-radius: 15px;
 font: 75 15pt &quot;Arial&quot;;</string>
           </property>
           <property name="text">
-           <string>Stage 1 of 2. Estimate Time 5m 4s</string>
+           <string>Estimated Time 5m 4s</string>
           </property>
          </widget>
         </item>
@@ -146,6 +146,9 @@ font: 75 15pt &quot;Arial&quot;;</string>
         <property name="text">
          <string/>
         </property>
+        <property name="alignment">
+         <set>Qt::AlignCenter</set>
+        </property>
         <property name="margin">
          <number>30</number>
         </property>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ae3b49709..bdee44b04 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,9 +11,11 @@
 #include "applets/profile_select.h"
 #include "applets/software_keyboard.h"
 #include "applets/web_browser.h"
+#include "configuration/configure_input.h"
 #include "configuration/configure_per_general.h"
 #include "core/file_sys/vfs.h"
 #include "core/file_sys/vfs_real.h"
+#include "core/frontend/scope_acquire_window_context.h"
 #include "core/hle/service/acc/profile_manager.h"
 #include "core/hle/service/am/applets/applets.h"
 #include "core/hle/service/hid/controllers/npad.h"
@@ -35,14 +37,20 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include <glad/glad.h>
 
 #define QT_NO_OPENGL
+#include <QClipboard>
+#include <QDesktopServices>
 #include <QDesktopWidget>
 #include <QDialogButtonBox>
 #include <QFile>
 #include <QFileDialog>
+#include <QInputDialog>
 #include <QMessageBox>
+#include <QProgressBar>
+#include <QProgressDialog>
+#include <QShortcut>
+#include <QStatusBar>
 #include <QtConcurrent/QtConcurrent>
-#include <QtGui>
-#include <QtWidgets>
+
 #include <fmt/format.h>
 #include "common/common_paths.h"
 #include "common/detached_tasks.h"
@@ -53,11 +61,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "common/microprofile.h"
 #include "common/scm_rev.h"
 #include "common/scope_exit.h"
-#include "common/string_util.h"
 #include "common/telemetry.h"
 #include "core/core.h"
 #include "core/crypto/key_manager.h"
-#include "core/file_sys/bis_factory.h"
 #include "core/file_sys/card_image.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
@@ -69,7 +75,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "core/frontend/applets/software_keyboard.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/service/filesystem/filesystem.h"
-#include "core/hle/service/filesystem/fsp_ldr.h"
 #include "core/hle/service/nfp/nfp.h"
 #include "core/hle/service/sm/sm.h"
 #include "core/loader/loader.h"
@@ -85,7 +90,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "yuzu/configuration/configure_dialog.h"
 #include "yuzu/debugger/console.h"
 #include "yuzu/debugger/graphics/graphics_breakpoints.h"
-#include "yuzu/debugger/graphics/graphics_surface.h"
 #include "yuzu/debugger/profiler.h"
 #include "yuzu/debugger/wait_tree.h"
 #include "yuzu/discord.h"
@@ -166,7 +170,8 @@ static void InitializeLogging() {
 
 GMainWindow::GMainWindow()
     : config(new Config()), emu_thread(nullptr),
-      vfs(std::make_shared<FileSys::RealVfsFilesystem>()) {
+      vfs(std::make_shared<FileSys::RealVfsFilesystem>()),
+      provider(std::make_unique<FileSys::ManualContentProvider>()) {
     InitializeLogging();
 
     debug_context = Tegra::DebugContext::Construct();
@@ -198,13 +203,18 @@ GMainWindow::GMainWindow()
                        .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc));
     show();
 
+    Core::System::GetInstance().SetContentProvider(
+        std::make_unique<FileSys::ContentProviderUnion>());
+    Core::System::GetInstance().RegisterContentProvider(
+        FileSys::ContentProviderUnionSlot::FrontendManual, provider.get());
+    Service::FileSystem::CreateFactories(*vfs);
+
     // Gen keys if necessary
     OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning);
 
-    // Necessary to load titles from nand in gamelist.
-    Service::FileSystem::CreateFactories(*vfs);
     game_list->LoadCompatibilityList();
-    game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+    game_list->PopulateAsync(UISettings::values.game_directory_path,
+                             UISettings::values.game_directory_deepscan);
 
     // Show one-time "callout" messages to the user
     ShowTelemetryCallout();
@@ -338,6 +348,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view
                 .arg(QString::fromStdString(std::to_string(key_code))));
     };
 
+    QMessageBox::information(
+        this, tr("Exit"),
+        tr("To exit the web application, use the game provided controls to select exit, select the "
+           "'Exit Web Applet' option in the menu bar, or press the 'Enter' key."));
+
     bool running_exit_check = false;
     while (!finished) {
         QApplication::processEvents();
@@ -409,7 +424,7 @@ void GMainWindow::InitializeWidgets() {
     render_window = new GRenderWindow(this, emu_thread.get());
     render_window->hide();
 
-    game_list = new GameList(vfs, this);
+    game_list = new GameList(vfs, provider.get(), this);
     ui.horizontalLayout->addWidget(game_list);
 
     loading_screen = new LoadingScreen(this);
@@ -468,11 +483,6 @@ void GMainWindow::InitializeDebugWidgets() {
     graphicsBreakpointsWidget->hide();
     debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction());
 
-    graphicsSurfaceWidget = new GraphicsSurfaceWidget(debug_context, this);
-    addDockWidget(Qt::RightDockWidgetArea, graphicsSurfaceWidget);
-    graphicsSurfaceWidget->hide();
-    debug_menu->addAction(graphicsSurfaceWidget->toggleViewAction());
-
     waitTreeWidget = new WaitTreeWidget(this);
     addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget);
     waitTreeWidget->hide();
@@ -504,32 +514,34 @@ void GMainWindow::InitializeRecentFileMenuActions() {
 }
 
 void GMainWindow::InitializeHotkeys() {
-    hotkey_registry.RegisterHotkey("Main Window", "Load File", QKeySequence::Open);
-    hotkey_registry.RegisterHotkey("Main Window", "Start Emulation");
-    hotkey_registry.RegisterHotkey("Main Window", "Continue/Pause", QKeySequence(Qt::Key_F4));
-    hotkey_registry.RegisterHotkey("Main Window", "Restart", QKeySequence(Qt::Key_F5));
-    hotkey_registry.RegisterHotkey("Main Window", "Fullscreen", QKeySequence::FullScreen);
-    hotkey_registry.RegisterHotkey("Main Window", "Exit Fullscreen", QKeySequence(Qt::Key_Escape),
-                                   Qt::ApplicationShortcut);
-    hotkey_registry.RegisterHotkey("Main Window", "Toggle Speed Limit", QKeySequence("CTRL+Z"),
-                                   Qt::ApplicationShortcut);
-    hotkey_registry.RegisterHotkey("Main Window", "Increase Speed Limit", QKeySequence("+"),
-                                   Qt::ApplicationShortcut);
-    hotkey_registry.RegisterHotkey("Main Window", "Decrease Speed Limit", QKeySequence("-"),
-                                   Qt::ApplicationShortcut);
-    hotkey_registry.RegisterHotkey("Main Window", "Load Amiibo", QKeySequence(Qt::Key_F2),
-                                   Qt::ApplicationShortcut);
-    hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot",
-                                   QKeySequence(QKeySequence::Print));
-
     hotkey_registry.LoadHotkeys();
 
+    ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence("Main Window", "Load File"));
+    ui.action_Load_File->setShortcutContext(
+        hotkey_registry.GetShortcutContext("Main Window", "Load File"));
+
+    ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence("Main Window", "Exit yuzu"));
+    ui.action_Exit->setShortcutContext(
+        hotkey_registry.GetShortcutContext("Main Window", "Exit yuzu"));
+
+    ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence("Main Window", "Stop Emulation"));
+    ui.action_Stop->setShortcutContext(
+        hotkey_registry.GetShortcutContext("Main Window", "Stop Emulation"));
+
+    ui.action_Show_Filter_Bar->setShortcut(
+        hotkey_registry.GetKeySequence("Main Window", "Toggle Filter Bar"));
+    ui.action_Show_Filter_Bar->setShortcutContext(
+        hotkey_registry.GetShortcutContext("Main Window", "Toggle Filter Bar"));
+
+    ui.action_Show_Status_Bar->setShortcut(
+        hotkey_registry.GetKeySequence("Main Window", "Toggle Status Bar"));
+    ui.action_Show_Status_Bar->setShortcutContext(
+        hotkey_registry.GetShortcutContext("Main Window", "Toggle Status Bar"));
+
     connect(hotkey_registry.GetHotkey("Main Window", "Load File", this), &QShortcut::activated,
             this, &GMainWindow::OnMenuLoadFile);
-    connect(hotkey_registry.GetHotkey("Main Window", "Start Emulation", this),
-            &QShortcut::activated, this, &GMainWindow::OnStartGame);
-    connect(hotkey_registry.GetHotkey("Main Window", "Continue/Pause", this), &QShortcut::activated,
-            this, [&] {
+    connect(hotkey_registry.GetHotkey("Main Window", "Continue/Pause Emulation", this),
+            &QShortcut::activated, this, [&] {
                 if (emulation_running) {
                     if (emu_thread->IsRunning()) {
                         OnPauseGame();
@@ -538,8 +550,8 @@ void GMainWindow::InitializeHotkeys() {
                     }
                 }
             });
-    connect(hotkey_registry.GetHotkey("Main Window", "Restart", this), &QShortcut::activated, this,
-            [this] {
+    connect(hotkey_registry.GetHotkey("Main Window", "Restart Emulation", this),
+            &QShortcut::activated, this, [this] {
                 if (!Core::System::GetInstance().IsPoweredOn())
                     return;
                 BootGame(QString(game_path));
@@ -560,7 +572,10 @@ void GMainWindow::InitializeHotkeys() {
                 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
                 UpdateStatusBar();
             });
-    constexpr u16 SPEED_LIMIT_STEP = 5;
+    // TODO: Remove this comment/static whenever the next major release of
+    // MSVC occurs and we make it a requirement (see:
+    // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
+    static constexpr u16 SPEED_LIMIT_STEP = 5;
     connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this),
             &QShortcut::activated, this, [&] {
                 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -587,6 +602,12 @@ void GMainWindow::InitializeHotkeys() {
                     OnCaptureScreenshot();
                 }
             });
+    connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this),
+            &QShortcut::activated, this, [&] {
+                Settings::values.use_docked_mode = !Settings::values.use_docked_mode;
+                OnDockedModeChanged(!Settings::values.use_docked_mode,
+                                    Settings::values.use_docked_mode);
+            });
 }
 
 void GMainWindow::SetDefaultUIGeometry() {
@@ -631,6 +652,8 @@ void GMainWindow::RestoreUIState() {
 void GMainWindow::ConnectWidgetEvents() {
     connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile);
     connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder);
+    connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this,
+            &GMainWindow::OnTransferableShaderCacheOpenFile);
     connect(game_list, &GameList::DumpRomFSRequested, this, &GMainWindow::OnGameListDumpRomFS);
     connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID);
     connect(game_list, &GameList::NavigateToGamedbEntryRequested, this,
@@ -675,7 +698,6 @@ void GMainWindow::ConnectMenuEvents() {
             &GMainWindow::ToggleWindowMode);
     connect(ui.action_Display_Dock_Widget_Headers, &QAction::triggered, this,
             &GMainWindow::OnDisplayTitleBars);
-    ui.action_Show_Filter_Bar->setShortcut(tr("CTRL+F"));
     connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar);
     connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible);
 
@@ -747,13 +769,15 @@ bool GMainWindow::LoadROM(const QString& filename) {
         ShutdownGame();
 
     render_window->InitRenderTarget();
-    render_window->MakeCurrent();
 
-    if (!gladLoadGL()) {
-        QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"),
-                              tr("Your GPU may not support OpenGL 4.3, or you do not "
-                                 "have the latest graphics driver."));
-        return false;
+    {
+        Core::Frontend::ScopeAcquireWindowContext acquire_context{*render_window};
+        if (!gladLoadGL()) {
+            QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"),
+                                  tr("Your GPU may not support OpenGL 4.3, or you do not "
+                                     "have the latest graphics driver."));
+            return false;
+        }
     }
 
     QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions();
@@ -794,8 +818,6 @@ bool GMainWindow::LoadROM(const QString& filename) {
                "wiki</a>. This message will not be shown again."));
     }
 
-    render_window->DoneCurrent();
-
     if (result != Core::System::ResultStatus::Success) {
         switch (result) {
         case Core::System::ResultStatus::ErrorGetLoader:
@@ -845,7 +867,7 @@ bool GMainWindow::LoadROM(const QString& filename) {
     }
     game_path = filename;
 
-    Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
+    system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt");
     return true;
 }
 
@@ -886,6 +908,9 @@ void GMainWindow::BootGame(const QString& filename) {
     connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
             &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
 
+    connect(emu_thread.get(), &EmuThread::LoadProgress, loading_screen,
+            &LoadingScreen::OnLoadProgress, Qt::QueuedConnection);
+
     // Update the GUI
     if (ui.action_Single_Window_Mode->isChecked()) {
         game_list->hide();
@@ -1062,6 +1087,39 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
     QDesktopServices::openUrl(QUrl::fromLocalFile(qpath));
 }
 
+void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
+    ASSERT(program_id != 0);
+
+    const QString tranferable_shader_cache_folder_path =
+        QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) + "opengl" +
+        DIR_SEP + "transferable";
+
+    const QString transferable_shader_cache_file_path =
+        tranferable_shader_cache_folder_path + DIR_SEP +
+        QString::fromStdString(fmt::format("{:016X}.bin", program_id));
+
+    if (!QFile::exists(transferable_shader_cache_file_path)) {
+        QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"),
+                             tr("A shader cache for this title does not exist."));
+        return;
+    }
+
+    // Windows supports opening a folder with selecting a specified file in explorer. On every other
+    // OS we just open the transferable shader cache folder without preselecting the transferable
+    // shader cache file for the selected game.
+#if defined(Q_OS_WIN)
+    const QString explorer = QStringLiteral("explorer");
+    QStringList param;
+    if (!QFileInfo(transferable_shader_cache_file_path).isDir()) {
+        param << QStringLiteral("/select,");
+    }
+    param << QDir::toNativeSeparators(transferable_shader_cache_file_path);
+    QProcess::startDetached(explorer, param);
+#else
+    QDesktopServices::openUrl(QUrl::fromLocalFile(tranferable_shader_cache_folder_path));
+#endif
+}
+
 static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
     std::size_t out = 0;
 
@@ -1121,7 +1179,7 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
         return;
     }
 
-    const auto installed = Service::FileSystem::GetUnionContents();
+    const auto& installed = Core::System::GetInstance().GetContentProvider();
     const auto romfs_title_id = SelectRomFSDumpTarget(installed, program_id);
 
     if (!romfs_title_id) {
@@ -1221,8 +1279,8 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) {
 
         const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
         if (reload) {
-            game_list->PopulateAsync(UISettings::values.gamedir,
-                                     UISettings::values.gamedir_deepscan);
+            game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                     UISettings::values.game_directory_deepscan);
         }
 
         config->Save();
@@ -1310,7 +1368,8 @@ void GMainWindow::OnMenuInstallToNAND() {
     const auto success = [this]() {
         QMessageBox::information(this, tr("Successfully Installed"),
                                  tr("The file was successfully installed."));
-        game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                 UISettings::values.game_directory_deepscan);
     };
 
     const auto failed = [this]() {
@@ -1437,8 +1496,8 @@ void GMainWindow::OnMenuInstallToNAND() {
 void GMainWindow::OnMenuSelectGameListRoot() {
     QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
     if (!dir_path.isEmpty()) {
-        UISettings::values.gamedir = dir_path;
-        game_list->PopulateAsync(dir_path, UISettings::values.gamedir_deepscan);
+        UISettings::values.game_directory_path = dir_path;
+        game_list->PopulateAsync(dir_path, UISettings::values.game_directory_deepscan);
     }
 }
 
@@ -1460,7 +1519,8 @@ void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target)
                                                                       : FileUtil::UserPath::NANDDir,
                               dir_path.toStdString());
         Service::FileSystem::CreateFactories(*vfs);
-        game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                 UISettings::values.game_directory_deepscan);
     }
 }
 
@@ -1604,6 +1664,7 @@ void GMainWindow::OnConfigure() {
     auto result = configureDialog.exec();
     if (result == QDialog::Accepted) {
         configureDialog.applyConfiguration();
+        InitializeHotkeys();
         if (UISettings::values.theme != old_theme)
             UpdateUITheme();
         if (UISettings::values.enable_discord_presence != old_discord_presence)
@@ -1611,8 +1672,8 @@ void GMainWindow::OnConfigure() {
 
         const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
         if (reload) {
-            game_list->PopulateAsync(UISettings::values.gamedir,
-                                     UISettings::values.gamedir_deepscan);
+            game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                     UISettings::values.game_directory_deepscan);
         }
 
         config->Save();
@@ -1681,12 +1742,16 @@ void GMainWindow::OnToggleFilterBar() {
 
 void GMainWindow::OnCaptureScreenshot() {
     OnPauseGame();
-    const QString path =
-        QFileDialog::getSaveFileName(this, tr("Capture Screenshot"),
-                                     UISettings::values.screenshot_path, tr("PNG Image (*.png)"));
-    if (!path.isEmpty()) {
-        UISettings::values.screenshot_path = QFileInfo(path).path();
-        render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path);
+    QFileDialog png_dialog(this, tr("Capture Screenshot"), UISettings::values.screenshot_path,
+                           tr("PNG Image (*.png)"));
+    png_dialog.setAcceptMode(QFileDialog::AcceptSave);
+    png_dialog.setDefaultSuffix("png");
+    if (png_dialog.exec()) {
+        const QString path = png_dialog.selectedFiles().first();
+        if (!path.isEmpty()) {
+            UISettings::values.screenshot_path = QFileInfo(path).path();
+            render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path);
+        }
     }
     OnStartGame();
 }
@@ -1858,18 +1923,19 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
     Service::FileSystem::CreateFactories(*vfs);
 
     if (behavior == ReinitializeKeyBehavior::Warning) {
-        game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                 UISettings::values.game_directory_deepscan);
     }
 }
 
-std::optional<u64> GMainWindow::SelectRomFSDumpTarget(
-    const FileSys::RegisteredCacheUnion& installed, u64 program_id) {
+std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed,
+                                                      u64 program_id) {
     const auto dlc_entries =
         installed.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data);
-    std::vector<FileSys::RegisteredCacheEntry> dlc_match;
+    std::vector<FileSys::ContentProviderEntry> dlc_match;
     dlc_match.reserve(dlc_entries.size());
     std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match),
-                 [&program_id, &installed](const FileSys::RegisteredCacheEntry& entry) {
+                 [&program_id, &installed](const FileSys::ContentProviderEntry& entry) {
                      return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == program_id &&
                             installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success;
                  });
@@ -2055,6 +2121,9 @@ int main(int argc, char* argv[]) {
     GMainWindow main_window;
     // After settings have been loaded by GMainWindow, apply the filter
     main_window.show();
+
+    Settings::LogSettings();
+
     int result = app.exec();
     detached_tasks.WaitForAllTasks();
     return result;
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 080484995..ce5045819 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -23,7 +23,6 @@ class EmuThread;
 class GameList;
 class GImageInfo;
 class GraphicsBreakPointsWidget;
-class GraphicsSurfaceWidget;
 class GRenderWindow;
 class LoadingScreen;
 class MicroProfileDialog;
@@ -37,7 +36,8 @@ struct SoftwareKeyboardParameters;
 } // namespace Core::Frontend
 
 namespace FileSys {
-class RegisteredCacheUnion;
+class ContentProvider;
+class ManualContentProvider;
 class VfsFilesystem;
 } // namespace FileSys
 
@@ -120,7 +120,6 @@ private:
     void InitializeWidgets();
     void InitializeDebugWidgets();
     void InitializeRecentFileMenuActions();
-    void InitializeHotkeys();
 
     void SetDefaultUIGeometry();
     void RestoreUIState();
@@ -176,6 +175,7 @@ private slots:
     /// Called whenever a user selects a game in the game list widget.
     void OnGameListLoadFile(QString game_path);
     void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target);
+    void OnTransferableShaderCacheOpenFile(u64 program_id);
     void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
     void OnGameListCopyTID(u64 program_id);
     void OnGameListNavigateToGamedbEntry(u64 program_id,
@@ -195,6 +195,7 @@ private slots:
     void OnAbout();
     void OnToggleFilterBar();
     void OnDisplayTitleBars(bool);
+    void InitializeHotkeys();
     void ToggleFullscreen();
     void ShowFullscreen();
     void HideFullscreen();
@@ -204,7 +205,7 @@ private slots:
     void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
 
 private:
-    std::optional<u64> SelectRomFSDumpTarget(const FileSys::RegisteredCacheUnion&, u64 program_id);
+    std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
     void UpdateStatusBar();
 
     Ui::MainWindow ui;
@@ -232,12 +233,12 @@ private:
 
     // FS
     std::shared_ptr<FileSys::VfsFilesystem> vfs;
+    std::unique_ptr<FileSys::ManualContentProvider> provider;
 
     // Debugger panes
     ProfilerWidget* profilerWidget;
     MicroProfileDialog* microProfileDialog;
     GraphicsBreakPointsWidget* graphicsBreakpointsWidget;
-    GraphicsSurfaceWidget* graphicsSurfaceWidget;
     WaitTreeWidget* waitTreeWidget;
 
     QAction* actions_recent_files[max_recent_files_item];
diff --git a/src/yuzu/ui_settings.cpp b/src/yuzu/ui_settings.cpp
index a314493fc..4bdc302e0 100644
--- a/src/yuzu/ui_settings.cpp
+++ b/src/yuzu/ui_settings.cpp
@@ -12,5 +12,4 @@ const Themes themes{{
 }};
 
 Values values = {};
-
 } // namespace UISettings
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/ui_settings.h
index 82aaeedb0..dbd318e20 100644
--- a/src/yuzu/ui_settings.h
+++ b/src/yuzu/ui_settings.h
@@ -15,7 +15,12 @@
 namespace UISettings {
 
 using ContextualShortcut = std::pair<QString, int>;
-using Shortcut = std::pair<QString, ContextualShortcut>;
+
+struct Shortcut {
+    QString name;
+    QString group;
+    ContextualShortcut shortcut;
+};
 
 using Themes = std::array<std::pair<const char*, const char*>, 2>;
 extern const Themes themes;
@@ -50,8 +55,8 @@ struct Values {
     QString roms_path;
     QString symbols_path;
     QString screenshot_path;
-    QString gamedir;
-    bool gamedir_deepscan;
+    QString game_directory_path;
+    bool game_directory_deepscan;
     QStringList recent_files;
 
     QString theme;
diff --git a/src/yuzu/util/sequence_dialog/sequence_dialog.cpp b/src/yuzu/util/sequence_dialog/sequence_dialog.cpp
new file mode 100644
index 000000000..d3edf6ec3
--- /dev/null
+++ b/src/yuzu/util/sequence_dialog/sequence_dialog.cpp
@@ -0,0 +1,37 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <QDialogButtonBox>
+#include <QKeySequenceEdit>
+#include <QVBoxLayout>
+#include "yuzu/util/sequence_dialog/sequence_dialog.h"
+
+SequenceDialog::SequenceDialog(QWidget* parent) : QDialog(parent) {
+    setWindowTitle(tr("Enter a hotkey"));
+    auto* layout = new QVBoxLayout(this);
+    key_sequence = new QKeySequenceEdit;
+    layout->addWidget(key_sequence);
+    auto* buttons =
+        new QDialogButtonBox(QDialogButtonBox::Ok | QDialogButtonBox::Cancel, Qt::Horizontal);
+    buttons->setCenterButtons(true);
+    layout->addWidget(buttons);
+    connect(buttons, &QDialogButtonBox::accepted, this, &QDialog::accept);
+    connect(buttons, &QDialogButtonBox::rejected, this, &QDialog::reject);
+    setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
+}
+
+SequenceDialog::~SequenceDialog() = default;
+
+QKeySequence SequenceDialog::GetSequence() const {
+    // Only the first key is returned. The other 3, if present, are ignored.
+    return QKeySequence(key_sequence->keySequence()[0]);
+}
+
+bool SequenceDialog::focusNextPrevChild(bool next) {
+    return false;
+}
+
+void SequenceDialog::closeEvent(QCloseEvent*) {
+    reject();
+}
diff --git a/src/yuzu/util/sequence_dialog/sequence_dialog.h b/src/yuzu/util/sequence_dialog/sequence_dialog.h
new file mode 100644
index 000000000..969c77740
--- /dev/null
+++ b/src/yuzu/util/sequence_dialog/sequence_dialog.h
@@ -0,0 +1,24 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <QDialog>
+
+class QKeySequenceEdit;
+
+class SequenceDialog : public QDialog {
+    Q_OBJECT
+
+public:
+    explicit SequenceDialog(QWidget* parent = nullptr);
+    ~SequenceDialog() override;
+
+    QKeySequence GetSequence() const;
+    void closeEvent(QCloseEvent*) override;
+
+private:
+    QKeySequenceEdit* key_sequence;
+    bool focusNextPrevChild(bool next) override;
+};
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7a77f76e8..f24cc77fe 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -319,7 +319,6 @@ void Config::ReadValues() {
 
     // System
     Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false);
-    Settings::values.enable_nfc = sdl2_config->GetBoolean("System", "enable_nfc", true);
     const auto size = sdl2_config->GetInteger("System", "users_size", 0);
 
     Settings::values.current_user = std::clamp<int>(
@@ -346,23 +345,28 @@ void Config::ReadValues() {
 
     // Renderer
     Settings::values.resolution_factor =
-        (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);
+        static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
     Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
     Settings::values.frame_limit =
         static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
+    Settings::values.use_disk_shader_cache =
+        sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
     Settings::values.use_accurate_gpu_emulation =
         sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
+    Settings::values.use_asynchronous_gpu_emulation =
+        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
 
-    Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
-    Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);
-    Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0);
+    Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
+    Settings::values.bg_green =
+        static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
+    Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
 
     // Audio
     Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
     Settings::values.enable_audio_stretching =
         sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
     Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
-    Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1);
+    Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
 
     Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
 
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index ba51a4a51..6538af098 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -110,10 +110,18 @@ use_frame_limit =
 # 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
 frame_limit =
 
+# Whether to use disk based shader cache
+# 0 (default): Off, 1 : On
+use_disk_shader_cache =
+
 # Whether to use accurate GPU emulation
 # 0 (default): Off (fast), 1 : On (slow)
 use_accurate_gpu_emulation =
 
+# Whether to use asynchronous GPU emulation
+# 0 : Off (slow), 1 (default): On (fast)
+use_asynchronous_gpu_emulation =
+
 # The clear color for the renderer. What shows up on the sides of the bottom screen.
 # Must be in range of 0.0-1.0. Defaults to 1.0 for all.
 bg_red =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index d246389fa..68a176032 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -166,16 +166,16 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
 }
 
 EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
-    InputCommon::Init();
-
-    SDL_SetMainReady();
-
     // Initialize the window
     if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) {
         LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting...");
         exit(1);
     }
 
+    InputCommon::Init();
+
+    SDL_SetMainReady();
+
     SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
     SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
     SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
@@ -226,16 +226,15 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
     SDL_GL_SetSwapInterval(false);
     LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
              Common::g_scm_desc);
+    Settings::LogSettings();
 
     DoneCurrent();
 }
 
 EmuWindow_SDL2::~EmuWindow_SDL2() {
-    InputCommon::SDL::CloseSDLJoysticks();
+    InputCommon::Shutdown();
     SDL_GL_DeleteContext(gl_context);
     SDL_Quit();
-
-    InputCommon::Shutdown();
 }
 
 void EmuWindow_SDL2::SwapBuffers() {
@@ -292,7 +291,6 @@ void EmuWindow_SDL2::PollEvents() {
             is_open = false;
             break;
         default:
-            InputCommon::SDL::HandleGameControllerEvent(event);
             break;
         }
     }
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 806127b12..7ea4a1b18 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -28,10 +28,12 @@
 #include "core/loader/loader.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
+#include "video_core/renderer_base.h"
 #include "yuzu_cmd/config.h"
 #include "yuzu_cmd/emu_window/emu_window_sdl2.h"
 
 #include <getopt.h>
+#include "core/file_sys/registered_cache.h"
 #ifndef _MSC_VER
 #include <unistd.h>
 #endif
@@ -113,9 +115,9 @@ int main(int argc, char** argv) {
     };
 
     while (optind < argc) {
-        char arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
+        int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
         if (arg != -1) {
-            switch (arg) {
+            switch (static_cast<char>(arg)) {
             case 'g':
                 errno = 0;
                 gdb_port = strtoul(optarg, &endarg, 0);
@@ -177,6 +179,7 @@ int main(int argc, char** argv) {
     }
 
     Core::System& system{Core::System::GetInstance()};
+    system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>());
     system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>());
     Service::FileSystem::CreateFactories(*system.GetFilesystem());
 
@@ -215,7 +218,9 @@ int main(int argc, char** argv) {
         }
     }
 
-    Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
+    system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
+
+    system.Renderer().Rasterizer().LoadDiskResources();
 
     while (emu_window->IsOpen()) {
         system.RunLoop();