13 files changed, 866 insertions, 344 deletions
diff --git a/.travis-build.sh b/.travis-build.sh
index bb4e6fc47..fc5a5f8b2 100755
--- a/.travis-build.sh
+++ b/.travis-build.sh
@@ -52,8 +52,8 @@ elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
     export Qt5_DIR=$(brew --prefix)/opt/qt5
 
     mkdir build && cd build
-    cmake .. -DUSE_SYSTEM_CURL=ON -GXcode
-    xcodebuild -configuration Release
+    cmake .. -DUSE_SYSTEM_CURL=ON -DCMAKE_OSX_ARCHITECTURES="x86_64;x86_64h" -DCMAKE_BUILD_TYPE=Release
+    make -j4
 
     ctest -VV -C Release
 fi
diff --git a/.travis-upload.sh b/.travis-upload.sh
index 8c1fa21c5..edf195f7d 100755
--- a/.travis-upload.sh
+++ b/.travis-upload.sh
@@ -16,8 +16,8 @@ elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
     COMPRESSION_FLAGS="-czvf"
     mkdir "$REV_NAME"
 
-    cp build/src/citra/Release/citra "$REV_NAME"
-    cp -r build/src/citra_qt/Release/citra-qt.app "$REV_NAME"
+    cp build/src/citra/citra "$REV_NAME"
+    cp -r build/src/citra_qt/citra-qt.app "$REV_NAME"
 
     # move qt libs into app bundle for deployment
     $(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/citra-qt.app"
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 3ed619991..2618da18c 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -29,6 +29,7 @@ set(SRCS
             file_sys/ncch_container.cpp
             file_sys/path_parser.cpp
             file_sys/savedata_archive.cpp
+            file_sys/title_metadata.cpp
             frontend/camera/blank_camera.cpp
             frontend/camera/factory.cpp
             frontend/camera/interface.cpp
diff --git a/src/core/file_sys/archive_ncch.cpp b/src/core/file_sys/archive_ncch.cpp
index 6d9007731..e8c5be983 100644
--- a/src/core/file_sys/archive_ncch.cpp
+++ b/src/core/file_sys/archive_ncch.cpp
@@ -13,7 +13,10 @@
 #include "core/file_sys/archive_ncch.h"
 #include "core/file_sys/errors.h"
 #include "core/file_sys/ivfc_archive.h"
+#include "core/file_sys/ncch_container.h"
+#include "core/file_sys/title_metadata.h"
 #include "core/hle/service/fs/archive.h"
+#include "core/loader/loader.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // FileSys namespace
@@ -25,8 +28,18 @@ static std::string GetNCCHContainerPath(const std::string& nand_directory) {
 }
 
 static std::string GetNCCHPath(const std::string& mount_point, u32 high, u32 low) {
-    return Common::StringFromFormat("%s%08x/%08x/content/00000000.app.romfs", mount_point.c_str(),
-                                    high, low);
+    u32 content_id = 0;
+
+    // TODO(shinyquagsire23): Title database should be doing this path lookup
+    std::string content_path =
+        Common::StringFromFormat("%s%08x/%08x/content/", mount_point.c_str(), high, low);
+    std::string tmd_path = content_path + "00000000.tmd";
+    TitleMetadata tmd(tmd_path);
+    if (tmd.Load() == Loader::ResultStatus::Success) {
+        content_id = tmd.GetBootContentID();
+    }
+
+    return Common::StringFromFormat("%s%08x.app", content_path.c_str(), content_id);
 }
 
 ArchiveFactory_NCCH::ArchiveFactory_NCCH(const std::string& nand_directory)
@@ -38,9 +51,14 @@ ResultVal<std::unique_ptr<ArchiveBackend>> ArchiveFactory_NCCH::Open(const Path&
     u32 high = data[1];
     u32 low = data[0];
     std::string file_path = GetNCCHPath(mount_point, high, low);
-    auto file = std::make_shared<FileUtil::IOFile>(file_path, "rb");
 
-    if (!file->IsOpen()) {
+    std::shared_ptr<FileUtil::IOFile> romfs_file;
+    u64 romfs_offset = 0;
+    u64 romfs_size = 0;
+    auto ncch_container = NCCHContainer(file_path);
+
+    if (ncch_container.ReadRomFS(romfs_file, romfs_offset, romfs_size) !=
+        Loader::ResultStatus::Success) {
         // High Title ID of the archive: The category (https://3dbrew.org/wiki/Title_list).
         constexpr u32 shared_data_archive = 0x0004009B;
         constexpr u32 system_data_archive = 0x000400DB;
@@ -74,9 +92,8 @@ ResultVal<std::unique_ptr<ArchiveBackend>> ArchiveFactory_NCCH::Open(const Path&
         }
         return ERROR_NOT_FOUND;
     }
-    auto size = file->GetSize();
 
-    auto archive = std::make_unique<IVFCArchive>(file, 0, size);
+    auto archive = std::make_unique<IVFCArchive>(romfs_file, romfs_offset, romfs_size);
     return MakeResult<std::unique_ptr<ArchiveBackend>>(std::move(archive));
 }
 
diff --git a/src/core/file_sys/ncch_container.cpp b/src/core/file_sys/ncch_container.cpp
index 59c72f3e9..b9fb940c7 100644
--- a/src/core/file_sys/ncch_container.cpp
+++ b/src/core/file_sys/ncch_container.cpp
@@ -116,92 +116,143 @@ Loader::ResultStatus NCCHContainer::Load() {
     if (is_loaded)
         return Loader::ResultStatus::Success;
 
-    // Reset read pointer in case this file has been read before.
-    file.Seek(0, SEEK_SET);
+    if (file.IsOpen()) {
+        // Reset read pointer in case this file has been read before.
+        file.Seek(0, SEEK_SET);
 
-    if (file.ReadBytes(&ncch_header, sizeof(NCCH_Header)) != sizeof(NCCH_Header))
-        return Loader::ResultStatus::Error;
+        if (file.ReadBytes(&ncch_header, sizeof(NCCH_Header)) != sizeof(NCCH_Header))
+            return Loader::ResultStatus::Error;
 
-    // Skip NCSD header and load first NCCH (NCSD is just a container of NCCH files)...
-    if (Loader::MakeMagic('N', 'C', 'S', 'D') == ncch_header.magic) {
-        LOG_DEBUG(Service_FS, "Only loading the first (bootable) NCCH within the NCSD file!");
-        ncch_offset = 0x4000;
-        file.Seek(ncch_offset, SEEK_SET);
-        file.ReadBytes(&ncch_header, sizeof(NCCH_Header));
-    }
+        // Skip NCSD header and load first NCCH (NCSD is just a container of NCCH files)...
+        if (Loader::MakeMagic('N', 'C', 'S', 'D') == ncch_header.magic) {
+            LOG_DEBUG(Service_FS, "Only loading the first (bootable) NCCH within the NCSD file!");
+            ncch_offset = 0x4000;
+            file.Seek(ncch_offset, SEEK_SET);
+            file.ReadBytes(&ncch_header, sizeof(NCCH_Header));
+        }
 
-    // Verify we are loading the correct file type...
-    if (Loader::MakeMagic('N', 'C', 'C', 'H') != ncch_header.magic)
-        return Loader::ResultStatus::ErrorInvalidFormat;
+        // Verify we are loading the correct file type...
+        if (Loader::MakeMagic('N', 'C', 'C', 'H') != ncch_header.magic)
+            return Loader::ResultStatus::ErrorInvalidFormat;
+
+        has_header = true;
+
+        // System archives and DLC don't have an extended header but have RomFS
+        if (ncch_header.extended_header_size) {
+            if (file.ReadBytes(&exheader_header, sizeof(ExHeader_Header)) !=
+                sizeof(ExHeader_Header))
+                return Loader::ResultStatus::Error;
+
+            is_compressed = (exheader_header.codeset_info.flags.flag & 1) == 1;
+            u32 entry_point = exheader_header.codeset_info.text.address;
+            u32 code_size = exheader_header.codeset_info.text.code_size;
+            u32 stack_size = exheader_header.codeset_info.stack_size;
+            u32 bss_size = exheader_header.codeset_info.bss_size;
+            u32 core_version = exheader_header.arm11_system_local_caps.core_version;
+            u8 priority = exheader_header.arm11_system_local_caps.priority;
+            u8 resource_limit_category =
+                exheader_header.arm11_system_local_caps.resource_limit_category;
+
+            LOG_DEBUG(Service_FS, "Name:                        %s",
+                      exheader_header.codeset_info.name);
+            LOG_DEBUG(Service_FS, "Program ID:                  %016" PRIX64,
+                      ncch_header.program_id);
+            LOG_DEBUG(Service_FS, "Code compressed:             %s", is_compressed ? "yes" : "no");
+            LOG_DEBUG(Service_FS, "Entry point:                 0x%08X", entry_point);
+            LOG_DEBUG(Service_FS, "Code size:                   0x%08X", code_size);
+            LOG_DEBUG(Service_FS, "Stack size:                  0x%08X", stack_size);
+            LOG_DEBUG(Service_FS, "Bss size:                    0x%08X", bss_size);
+            LOG_DEBUG(Service_FS, "Core version:                %d", core_version);
+            LOG_DEBUG(Service_FS, "Thread priority:             0x%X", priority);
+            LOG_DEBUG(Service_FS, "Resource limit category:     %d", resource_limit_category);
+            LOG_DEBUG(Service_FS, "System Mode:                 %d",
+                      static_cast<int>(exheader_header.arm11_system_local_caps.system_mode));
+
+            if (exheader_header.system_info.jump_id != ncch_header.program_id) {
+                LOG_ERROR(Service_FS,
+                          "ExHeader Program ID mismatch: the ROM is probably encrypted.");
+                return Loader::ResultStatus::ErrorEncrypted;
+            }
 
-    // System archives and DLC don't have an extended header but have RomFS
-    if (ncch_header.extended_header_size) {
-        if (file.ReadBytes(&exheader_header, sizeof(ExHeader_Header)) != sizeof(ExHeader_Header))
-            return Loader::ResultStatus::Error;
+            has_exheader = true;
+        }
 
-        is_compressed = (exheader_header.codeset_info.flags.flag & 1) == 1;
-        u32 entry_point = exheader_header.codeset_info.text.address;
-        u32 code_size = exheader_header.codeset_info.text.code_size;
-        u32 stack_size = exheader_header.codeset_info.stack_size;
-        u32 bss_size = exheader_header.codeset_info.bss_size;
-        u32 core_version = exheader_header.arm11_system_local_caps.core_version;
-        u8 priority = exheader_header.arm11_system_local_caps.priority;
-        u8 resource_limit_category =
-            exheader_header.arm11_system_local_caps.resource_limit_category;
-
-        LOG_DEBUG(Service_FS, "Name:                        %s", exheader_header.codeset_info.name);
-        LOG_DEBUG(Service_FS, "Program ID:                  %016" PRIX64, ncch_header.program_id);
-        LOG_DEBUG(Service_FS, "Code compressed:             %s", is_compressed ? "yes" : "no");
-        LOG_DEBUG(Service_FS, "Entry point:                 0x%08X", entry_point);
-        LOG_DEBUG(Service_FS, "Code size:                   0x%08X", code_size);
-        LOG_DEBUG(Service_FS, "Stack size:                  0x%08X", stack_size);
-        LOG_DEBUG(Service_FS, "Bss size:                    0x%08X", bss_size);
-        LOG_DEBUG(Service_FS, "Core version:                %d", core_version);
-        LOG_DEBUG(Service_FS, "Thread priority:             0x%X", priority);
-        LOG_DEBUG(Service_FS, "Resource limit category:     %d", resource_limit_category);
-        LOG_DEBUG(Service_FS, "System Mode:                 %d",
-                  static_cast<int>(exheader_header.arm11_system_local_caps.system_mode));
-
-        if (exheader_header.system_info.jump_id != ncch_header.program_id) {
-            LOG_ERROR(Service_FS, "ExHeader Program ID mismatch: the ROM is probably encrypted.");
-            return Loader::ResultStatus::ErrorEncrypted;
+        // DLC can have an ExeFS and a RomFS but no extended header
+        if (ncch_header.exefs_size) {
+            exefs_offset = ncch_header.exefs_offset * kBlockSize;
+            u32 exefs_size = ncch_header.exefs_size * kBlockSize;
+
+            LOG_DEBUG(Service_FS, "ExeFS offset:                0x%08X", exefs_offset);
+            LOG_DEBUG(Service_FS, "ExeFS size:                  0x%08X", exefs_size);
+
+            file.Seek(exefs_offset + ncch_offset, SEEK_SET);
+            if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header))
+                return Loader::ResultStatus::Error;
+
+            exefs_file = FileUtil::IOFile(filepath, "rb");
+            has_exefs = true;
         }
 
-        has_exheader = true;
+        if (ncch_header.romfs_offset != 0 && ncch_header.romfs_size != 0)
+            has_romfs = true;
     }
 
-    // DLC can have an ExeFS and a RomFS but no extended header
-    if (ncch_header.exefs_size) {
-        exefs_offset = ncch_header.exefs_offset * kBlockSize;
-        u32 exefs_size = ncch_header.exefs_size * kBlockSize;
+    LoadOverrides();
 
-        LOG_DEBUG(Service_FS, "ExeFS offset:                0x%08X", exefs_offset);
-        LOG_DEBUG(Service_FS, "ExeFS size:                  0x%08X", exefs_size);
+    // We need at least one of these or overrides, practically
+    if (!(has_exefs || has_romfs || is_tainted))
+        return Loader::ResultStatus::Error;
 
-        file.Seek(exefs_offset + ncch_offset, SEEK_SET);
-        if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header))
-            return Loader::ResultStatus::Error;
+    is_loaded = true;
+    return Loader::ResultStatus::Success;
+}
 
-        has_exefs = true;
+Loader::ResultStatus NCCHContainer::LoadOverrides() {
+    // Check for split-off files, mark the archive as tainted if we will use them
+    std::string romfs_override = filepath + ".romfs";
+    if (FileUtil::Exists(romfs_override)) {
+        is_tainted = true;
     }
 
-    if (ncch_header.romfs_offset != 0 && ncch_header.romfs_size != 0)
-        has_romfs = true;
+    // If we have a split-off exefs file/folder, it takes priority
+    std::string exefs_override = filepath + ".exefs";
+    std::string exefsdir_override = filepath + ".exefsdir/";
+    if (FileUtil::Exists(exefs_override)) {
+        exefs_file = FileUtil::IOFile(exefs_override, "rb");
+
+        if (exefs_file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) == sizeof(ExeFs_Header)) {
+            LOG_DEBUG(Service_FS, "Loading ExeFS section from %s", exefs_override.c_str());
+            exefs_offset = 0;
+            is_tainted = true;
+            has_exefs = true;
+        } else {
+            exefs_file = FileUtil::IOFile(filepath, "rb");
+        }
+    } else if (FileUtil::Exists(exefsdir_override) && FileUtil::IsDirectory(exefsdir_override)) {
+        is_tainted = true;
+    }
+
+    if (is_tainted)
+        LOG_WARNING(Service_FS,
+                    "Loaded NCCH %s is tainted, application behavior may not be as expected!",
+                    filepath.c_str());
 
-    is_loaded = true;
     return Loader::ResultStatus::Success;
 }
 
 Loader::ResultStatus NCCHContainer::LoadSectionExeFS(const char* name, std::vector<u8>& buffer) {
-    if (!file.IsOpen())
-        return Loader::ResultStatus::Error;
-
     Loader::ResultStatus result = Load();
     if (result != Loader::ResultStatus::Success)
         return result;
 
-    if (!has_exefs)
-        return Loader::ResultStatus::ErrorNotUsed;
+    // Check if we have files that can drop-in and replace
+    result = LoadOverrideExeFSSection(name, buffer);
+    if (result == Loader::ResultStatus::Success || !has_exefs)
+        return result;
+
+    // If we don't have any separate files, we'll need a full ExeFS
+    if (!exefs_file.IsOpen())
+        return Loader::ResultStatus::Error;
 
     LOG_DEBUG(Service_FS, "%d sections:", kMaxSections);
     // Iterate through the ExeFs archive until we find a section with the specified name...
@@ -215,7 +266,7 @@ Loader::ResultStatus NCCHContainer::LoadSectionExeFS(const char* name, std::vect
 
             s64 section_offset =
                 (section.offset + exefs_offset + sizeof(ExeFs_Header) + ncch_offset);
-            file.Seek(section_offset, SEEK_SET);
+            exefs_file.Seek(section_offset, SEEK_SET);
 
             if (strcmp(section.name, ".code") == 0 && is_compressed) {
                 // Section is compressed, read compressed .code section...
@@ -226,7 +277,7 @@ Loader::ResultStatus NCCHContainer::LoadSectionExeFS(const char* name, std::vect
                     return Loader::ResultStatus::ErrorMemoryAllocationFailed;
                 }
 
-                if (file.ReadBytes(&temp_buffer[0], section.size) != section.size)
+                if (exefs_file.ReadBytes(&temp_buffer[0], section.size) != section.size)
                     return Loader::ResultStatus::Error;
 
                 // Decompress .code section...
@@ -237,7 +288,7 @@ Loader::ResultStatus NCCHContainer::LoadSectionExeFS(const char* name, std::vect
             } else {
                 // Section is uncompressed...
                 buffer.resize(section.size);
-                if (file.ReadBytes(&buffer[0], section.size) != section.size)
+                if (exefs_file.ReadBytes(&buffer[0], section.size) != section.size)
                     return Loader::ResultStatus::Error;
             }
             return Loader::ResultStatus::Success;
@@ -246,20 +297,56 @@ Loader::ResultStatus NCCHContainer::LoadSectionExeFS(const char* name, std::vect
     return Loader::ResultStatus::ErrorNotUsed;
 }
 
-Loader::ResultStatus NCCHContainer::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file,
-                                              u64& offset, u64& size) {
-    if (!file.IsOpen())
+Loader::ResultStatus NCCHContainer::LoadOverrideExeFSSection(const char* name,
+                                                             std::vector<u8>& buffer) {
+    std::string override_name;
+
+    // Map our section name to the extracted equivalent
+    if (!strcmp(name, ".code"))
+        override_name = "code.bin";
+    else if (!strcmp(name, "icon"))
+        override_name = "code.bin";
+    else if (!strcmp(name, "banner"))
+        override_name = "banner.bnr";
+    else if (!strcmp(name, "logo"))
+        override_name = "logo.bcma.lz";
+    else
         return Loader::ResultStatus::Error;
 
+    std::string section_override = filepath + ".exefsdir/" + override_name;
+    FileUtil::IOFile section_file(section_override, "rb");
+
+    if (section_file.IsOpen()) {
+        auto section_size = section_file.GetSize();
+        buffer.resize(section_size);
+
+        section_file.Seek(0, SEEK_SET);
+        if (section_file.ReadBytes(&buffer[0], section_size) == section_size) {
+            LOG_WARNING(Service_FS, "File %s overriding built-in ExeFS file",
+                        section_override.c_str());
+            return Loader::ResultStatus::Success;
+        }
+    }
+    return Loader::ResultStatus::ErrorNotUsed;
+}
+
+Loader::ResultStatus NCCHContainer::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file,
+                                              u64& offset, u64& size) {
     Loader::ResultStatus result = Load();
     if (result != Loader::ResultStatus::Success)
         return result;
 
+    if (ReadOverrideRomFS(romfs_file, offset, size) == Loader::ResultStatus::Success)
+        return Loader::ResultStatus::Success;
+
     if (!has_romfs) {
         LOG_DEBUG(Service_FS, "RomFS requested from NCCH which has no RomFS");
         return Loader::ResultStatus::ErrorNotUsed;
     }
 
+    if (!file.IsOpen())
+        return Loader::ResultStatus::Error;
+
     u32 romfs_offset = ncch_offset + (ncch_header.romfs_offset * kBlockSize) + 0x1000;
     u32 romfs_size = (ncch_header.romfs_size * kBlockSize) - 0x1000;
 
@@ -280,11 +367,31 @@ Loader::ResultStatus NCCHContainer::ReadRomFS(std::shared_ptr<FileUtil::IOFile>&
     return Loader::ResultStatus::Success;
 }
 
+Loader::ResultStatus NCCHContainer::ReadOverrideRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file,
+                                                      u64& offset, u64& size) {
+    // Check for RomFS overrides
+    std::string split_filepath = filepath + ".romfs";
+    if (FileUtil::Exists(split_filepath)) {
+        romfs_file = std::make_shared<FileUtil::IOFile>(split_filepath, "rb");
+        if (romfs_file->IsOpen()) {
+            LOG_WARNING(Service_FS, "File %s overriding built-in RomFS", split_filepath.c_str());
+            offset = 0;
+            size = romfs_file->GetSize();
+            return Loader::ResultStatus::Success;
+        }
+    }
+
+    return Loader::ResultStatus::ErrorNotUsed;
+}
+
 Loader::ResultStatus NCCHContainer::ReadProgramId(u64_le& program_id) {
     Loader::ResultStatus result = Load();
     if (result != Loader::ResultStatus::Success)
         return result;
 
+    if (!has_header)
+        return Loader::ResultStatus::ErrorNotUsed;
+
     program_id = ncch_header.program_id;
     return Loader::ResultStatus::Success;
 }
diff --git a/src/core/file_sys/ncch_container.h b/src/core/file_sys/ncch_container.h
index 8af9032b4..2cc9d13dc 100644
--- a/src/core/file_sys/ncch_container.h
+++ b/src/core/file_sys/ncch_container.h
@@ -180,6 +180,13 @@ public:
     Loader::ResultStatus Load();
 
     /**
+     * Attempt to find overridden sections for the NCCH and mark the container as tainted
+     * if any are found.
+     * @return ResultStatus result of function
+     */
+    Loader::ResultStatus LoadOverrides();
+
+    /**
      * Reads an application ExeFS section of an NCCH file (e.g. .code, .logo, etc.)
      * @param name Name of section to read out of NCCH file
      * @param buffer Vector to read data into
@@ -188,6 +195,15 @@ public:
     Loader::ResultStatus LoadSectionExeFS(const char* name, std::vector<u8>& buffer);
 
     /**
+     * Reads an application ExeFS section from external files instead of an NCCH file,
+     * (e.g. code.bin, logo.bcma.lz, icon.icn, banner.bnr)
+     * @param name Name of section to read from external files
+     * @param buffer Vector to read data into
+     * @return ResultStatus result of function
+     */
+    Loader::ResultStatus LoadOverrideExeFSSection(const char* name, std::vector<u8>& buffer);
+
+    /**
      * Get the RomFS of the NCCH container
      * Since the RomFS can be huge, we return a file reference instead of copying to a buffer
      * @param romfs_file The file containing the RomFS
@@ -199,6 +215,17 @@ public:
                                    u64& size);
 
     /**
+    * Get the override RomFS of the NCCH container
+    * Since the RomFS can be huge, we return a file reference instead of copying to a buffer
+    * @param romfs_file The file containing the RomFS
+    * @param offset The offset the romfs begins on
+    * @param size The size of the romfs
+    * @return ResultStatus result of function
+    */
+    Loader::ResultStatus ReadOverrideRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file,
+                                           u64& offset, u64& size);
+
+    /**
      * Get the Program ID of the NCCH container
      * @return ResultStatus result of function
      */
@@ -227,10 +254,12 @@ public:
     ExHeader_Header exheader_header;
 
 private:
+    bool has_header = false;
     bool has_exheader = false;
     bool has_exefs = false;
     bool has_romfs = false;
 
+    bool is_tainted = false; // Are there parts of this container being overridden?
     bool is_loaded = false;
     bool is_compressed = false;
 
@@ -239,6 +268,7 @@ private:
 
     std::string filepath;
     FileUtil::IOFile file;
+    FileUtil::IOFile exefs_file;
 };
 
 } // namespace FileSys
diff --git a/src/core/file_sys/title_metadata.cpp b/src/core/file_sys/title_metadata.cpp
new file mode 100644
index 000000000..1ef8840a0
--- /dev/null
+++ b/src/core/file_sys/title_metadata.cpp
@@ -0,0 +1,212 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cinttypes>
+#include <cryptopp/sha.h>
+#include "common/alignment.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "core/file_sys/title_metadata.h"
+#include "core/loader/loader.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// FileSys namespace
+
+namespace FileSys {
+
+static u32 GetSignatureSize(u32 signature_type) {
+    switch (signature_type) {
+    case Rsa4096Sha1:
+    case Rsa4096Sha256:
+        return 0x200;
+
+    case Rsa2048Sha1:
+    case Rsa2048Sha256:
+        return 0x100;
+
+    case EllipticSha1:
+    case EcdsaSha256:
+        return 0x3C;
+    }
+}
+
+Loader::ResultStatus TitleMetadata::Load() {
+    FileUtil::IOFile file(filepath, "rb");
+    if (!file.IsOpen())
+        return Loader::ResultStatus::Error;
+
+    if (!file.ReadBytes(&signature_type, sizeof(u32_be)))
+        return Loader::ResultStatus::Error;
+
+    // Signature lengths are variable, and the body follows the signature
+    u32 signature_size = GetSignatureSize(signature_type);
+
+    tmd_signature.resize(signature_size);
+    if (!file.ReadBytes(&tmd_signature[0], signature_size))
+        return Loader::ResultStatus::Error;
+
+    // The TMD body start position is rounded to the nearest 0x40 after the signature
+    size_t body_start = Common::AlignUp(signature_size + sizeof(u32), 0x40);
+    file.Seek(body_start, SEEK_SET);
+
+    // Read our TMD body, then load the amount of ContentChunks specified
+    if (file.ReadBytes(&tmd_body, sizeof(TitleMetadata::Body)) != sizeof(TitleMetadata::Body))
+        return Loader::ResultStatus::Error;
+
+    for (u16 i = 0; i < tmd_body.content_count; i++) {
+        ContentChunk chunk;
+        if (file.ReadBytes(&chunk, sizeof(ContentChunk)) == sizeof(ContentChunk)) {
+            tmd_chunks.push_back(chunk);
+        } else {
+            LOG_ERROR(Service_FS, "Malformed TMD %s, failed to load content chunk index %u!",
+                      filepath.c_str(), i);
+            return Loader::ResultStatus::ErrorInvalidFormat;
+        }
+    }
+
+    return Loader::ResultStatus::Success;
+}
+
+Loader::ResultStatus TitleMetadata::Save() {
+    FileUtil::IOFile file(filepath, "wb");
+    if (!file.IsOpen())
+        return Loader::ResultStatus::Error;
+
+    if (!file.WriteBytes(&signature_type, sizeof(u32_be)))
+        return Loader::ResultStatus::Error;
+
+    // Signature lengths are variable, and the body follows the signature
+    u32 signature_size = GetSignatureSize(signature_type);
+
+    if (!file.WriteBytes(tmd_signature.data(), signature_size))
+        return Loader::ResultStatus::Error;
+
+    // The TMD body start position is rounded to the nearest 0x40 after the signature
+    size_t body_start = Common::AlignUp(signature_size + sizeof(u32), 0x40);
+    file.Seek(body_start, SEEK_SET);
+
+    // Update our TMD body values and hashes
+    tmd_body.content_count = static_cast<u16>(tmd_chunks.size());
+
+    // TODO(shinyquagsire23): Do TMDs with more than one contentinfo exist?
+    // For now we'll just adjust the first index to hold all content chunks
+    // and ensure that no further content info data exists.
+    tmd_body.contentinfo = {};
+    tmd_body.contentinfo[0].index = 0;
+    tmd_body.contentinfo[0].command_count = static_cast<u16>(tmd_chunks.size());
+
+    CryptoPP::SHA256 chunk_hash;
+    for (u16 i = 0; i < tmd_body.content_count; i++) {
+        chunk_hash.Update(reinterpret_cast<u8*>(&tmd_chunks[i]), sizeof(ContentChunk));
+    }
+    chunk_hash.Final(tmd_body.contentinfo[0].hash.data());
+
+    CryptoPP::SHA256 contentinfo_hash;
+    for (size_t i = 0; i < tmd_body.contentinfo.size(); i++) {
+        chunk_hash.Update(reinterpret_cast<u8*>(&tmd_body.contentinfo[i]), sizeof(ContentInfo));
+    }
+    chunk_hash.Final(tmd_body.contentinfo_hash.data());
+
+    // Write our TMD body, then write each of our ContentChunks
+    if (file.WriteBytes(&tmd_body, sizeof(TitleMetadata::Body)) != sizeof(TitleMetadata::Body))
+        return Loader::ResultStatus::Error;
+
+    for (u16 i = 0; i < tmd_body.content_count; i++) {
+        ContentChunk chunk = tmd_chunks[i];
+        if (file.WriteBytes(&chunk, sizeof(ContentChunk)) != sizeof(ContentChunk))
+            return Loader::ResultStatus::Error;
+    }
+
+    return Loader::ResultStatus::Success;
+}
+
+u64 TitleMetadata::GetTitleID() const {
+    return tmd_body.title_id;
+}
+
+u32 TitleMetadata::GetTitleType() const {
+    return tmd_body.title_type;
+}
+
+u16 TitleMetadata::GetTitleVersion() const {
+    return tmd_body.title_version;
+}
+
+u64 TitleMetadata::GetSystemVersion() const {
+    return tmd_body.system_version;
+}
+
+size_t TitleMetadata::GetContentCount() const {
+    return tmd_chunks.size();
+}
+
+u32 TitleMetadata::GetBootContentID() const {
+    return tmd_chunks[TMDContentIndex::Main].id;
+}
+
+u32 TitleMetadata::GetManualContentID() const {
+    return tmd_chunks[TMDContentIndex::Manual].id;
+}
+
+u32 TitleMetadata::GetDLPContentID() const {
+    return tmd_chunks[TMDContentIndex::DLP].id;
+}
+
+void TitleMetadata::SetTitleID(u64 title_id) {
+    tmd_body.title_id = title_id;
+}
+
+void TitleMetadata::SetTitleType(u32 type) {
+    tmd_body.title_type = type;
+}
+
+void TitleMetadata::SetTitleVersion(u16 version) {
+    tmd_body.title_version = version;
+}
+
+void TitleMetadata::SetSystemVersion(u64 version) {
+    tmd_body.system_version = version;
+}
+
+void TitleMetadata::AddContentChunk(const ContentChunk& chunk) {
+    tmd_chunks.push_back(chunk);
+}
+
+void TitleMetadata::Print() const {
+    LOG_DEBUG(Service_FS, "%s - %u chunks", filepath.c_str(),
+              static_cast<u32>(tmd_body.content_count));
+
+    // Content info describes ranges of content chunks
+    LOG_DEBUG(Service_FS, "Content info:");
+    for (size_t i = 0; i < tmd_body.contentinfo.size(); i++) {
+        if (tmd_body.contentinfo[i].command_count == 0)
+            break;
+
+        LOG_DEBUG(Service_FS, "    Index %04X, Command Count %04X",
+                  static_cast<u32>(tmd_body.contentinfo[i].index),
+                  static_cast<u32>(tmd_body.contentinfo[i].command_count));
+    }
+
+    // For each content info, print their content chunk range
+    for (size_t i = 0; i < tmd_body.contentinfo.size(); i++) {
+        u16 index = static_cast<u16>(tmd_body.contentinfo[i].index);
+        u16 count = static_cast<u16>(tmd_body.contentinfo[i].command_count);
+
+        if (count == 0)
+            continue;
+
+        LOG_DEBUG(Service_FS, "Content chunks for content info index %zu:", i);
+        for (u16 j = index; j < index + count; j++) {
+            // Don't attempt to print content we don't have
+            if (j > tmd_body.content_count)
+                break;
+
+            const ContentChunk& chunk = tmd_chunks[j];
+            LOG_DEBUG(Service_FS, "    ID %08X, Index %04X, Type %04x, Size %016" PRIX64,
+                      static_cast<u32>(chunk.id), static_cast<u32>(chunk.index),
+                      static_cast<u32>(chunk.type), static_cast<u64>(chunk.size));
+        }
+    }
+}
+} // namespace FileSys
diff --git a/src/core/file_sys/title_metadata.h b/src/core/file_sys/title_metadata.h
new file mode 100644
index 000000000..1fc157bf3
--- /dev/null
+++ b/src/core/file_sys/title_metadata.h
@@ -0,0 +1,125 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+
+namespace Loader {
+enum class ResultStatus;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// FileSys namespace
+
+namespace FileSys {
+
+enum TMDSignatureType : u32 {
+    Rsa4096Sha1 = 0x10000,
+    Rsa2048Sha1 = 0x10001,
+    EllipticSha1 = 0x10002,
+    Rsa4096Sha256 = 0x10003,
+    Rsa2048Sha256 = 0x10004,
+    EcdsaSha256 = 0x10005
+};
+
+enum TMDContentTypeFlag : u16 {
+    Encrypted = 1 << 1,
+    Disc = 1 << 2,
+    CFM = 1 << 3,
+    Optional = 1 << 14,
+    Shared = 1 << 15
+};
+
+/**
+ * Helper which implements an interface to read and write Title Metadata (TMD) files.
+ * If a file path is provided and the file exists, it can be parsed and used, otherwise
+ * it must be created. The TMD file can then be interpreted, modified and/or saved.
+ */
+class TitleMetadata {
+public:
+    struct ContentChunk {
+        u32_be id;
+        u16_be index;
+        u16_be type;
+        u64_be size;
+        std::array<u8, 0x20> hash;
+    };
+
+    static_assert(sizeof(ContentChunk) == 0x30, "TMD ContentChunk structure size is wrong");
+
+    struct ContentInfo {
+        u16_be index;
+        u16_be command_count;
+        std::array<u8, 0x20> hash;
+    };
+
+    static_assert(sizeof(ContentInfo) == 0x24, "TMD ContentInfo structure size is wrong");
+
+#pragma pack(push, 1)
+
+    struct Body {
+        std::array<u8, 0x40> issuer;
+        u8 version;
+        u8 ca_crl_version;
+        u8 signer_crl_version;
+        u8 reserved;
+        u64_be system_version;
+        u64_be title_id;
+        u32_be title_type;
+        u16_be group_id;
+        u32_be savedata_size;
+        u32_be srl_private_savedata_size;
+        std::array<u8, 4> reserved_2;
+        u8 srl_flag;
+        std::array<u8, 0x31> reserved_3;
+        u32_be access_rights;
+        u16_be title_version;
+        u16_be content_count;
+        u16_be boot_content;
+        std::array<u8, 2> reserved_4;
+        std::array<u8, 0x20> contentinfo_hash;
+        std::array<ContentInfo, 64> contentinfo;
+    };
+
+    static_assert(sizeof(Body) == 0x9C4, "TMD body structure size is wrong");
+
+#pragma pack(pop)
+
+    explicit TitleMetadata(std::string& path) : filepath(std::move(path)) {}
+    Loader::ResultStatus Load();
+    Loader::ResultStatus Save();
+
+    u64 GetTitleID() const;
+    u32 GetTitleType() const;
+    u16 GetTitleVersion() const;
+    u64 GetSystemVersion() const;
+    size_t GetContentCount() const;
+    u32 GetBootContentID() const;
+    u32 GetManualContentID() const;
+    u32 GetDLPContentID() const;
+
+    void SetTitleID(u64 title_id);
+    void SetTitleType(u32 type);
+    void SetTitleVersion(u16 version);
+    void SetSystemVersion(u64 version);
+    void AddContentChunk(const ContentChunk& chunk);
+
+    void Print() const;
+
+private:
+    enum TMDContentIndex { Main = 0, Manual = 1, DLP = 2 };
+
+    Body tmd_body;
+    u32_be signature_type;
+    std::vector<u8> tmd_signature;
+    std::vector<ContentChunk> tmd_chunks;
+
+    std::string filepath;
+};
+
+} // namespace FileSys
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 02d5a7a36..d45daca35 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -55,22 +55,19 @@ SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u
             Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
         }
     } else {
-        // TODO(Subv): What happens if an application tries to create multiple memory blocks
-        // pointing to the same address?
         auto& vm_manager = shared_memory->owner_process->vm_manager;
         // The memory is already available and mapped in the owner process.
-        auto vma = vm_manager.FindVMA(address)->second;
-        // Copy it over to our own storage
-        shared_memory->backing_block = std::make_shared<std::vector<u8>>(
-            vma.backing_block->data() + vma.offset, vma.backing_block->data() + vma.offset + size);
-        shared_memory->backing_block_offset = 0;
-        // Unmap the existing pages
-        vm_manager.UnmapRange(address, size);
-        // Map our own block into the address space
-        vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size,
-                                  MemoryState::Shared);
-        // Reprotect the block with the new permissions
-        vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions));
+        auto vma = vm_manager.FindVMA(address);
+        ASSERT_MSG(vma != vm_manager.vma_map.end(), "Invalid memory address");
+        ASSERT_MSG(vma->second.backing_block, "Backing block doesn't exist for address");
+
+        // The returned VMA might be a bigger one encompassing the desired address.
+        auto vma_offset = address - vma->first;
+        ASSERT_MSG(vma_offset + size <= vma->second.size,
+                   "Shared memory exceeds bounds of mapped block");
+
+        shared_memory->backing_block = vma->second.backing_block;
+        shared_memory->backing_block_offset = vma->second.offset + vma_offset;
     }
 
     shared_memory->base_address = address;
@@ -184,4 +181,4 @@ u8* SharedMemory::GetPointer(u32 offset) {
     return backing_block->data() + backing_block_offset + offset;
 }
 
-} // namespace
+} // namespace Kernel
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 2f7362748..59ea9823d 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -171,7 +171,11 @@ void SendParameter(const MessageParameter& parameter) {
     next_parameter = parameter;
     // Signal the event to let the receiver know that a new parameter is ready to be read
     auto* const slot_data = GetAppletSlotData(static_cast<AppletId>(parameter.destination_id));
-    ASSERT(slot_data);
+    if (slot_data == nullptr) {
+        LOG_DEBUG(Service_APT, "No applet was registered with the id %03X",
+                  parameter.destination_id);
+        return;
+    }
 
     slot_data->parameter_event->Signal();
 }
@@ -505,9 +509,6 @@ void SendParameter(Service::Interface* self) {
     size_t size;
     VAddr buffer = rp.PopStaticBuffer(&size);
 
-    std::shared_ptr<HLE::Applets::Applet> dest_applet =
-        HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id));
-
     LOG_DEBUG(Service_APT,
               "called src_app_id=0x%08X, dst_app_id=0x%08X, signal_type=0x%08X,"
               "buffer_size=0x%08X, handle=0x%08X, size=0x%08zX, in_param_buffer_ptr=0x%08X",
@@ -522,12 +523,6 @@ void SendParameter(Service::Interface* self) {
         return;
     }
 
-    if (dest_applet == nullptr) {
-        LOG_ERROR(Service_APT, "Unknown applet id=0x%08X", dst_app_id);
-        rb.Push<u32>(-1); // TODO(Subv): Find the right error code
-        return;
-    }
-
     MessageParameter param;
     param.destination_id = dst_app_id;
     param.sender_id = src_app_id;
@@ -536,7 +531,14 @@ void SendParameter(Service::Interface* self) {
     param.buffer.resize(buffer_size);
     Memory::ReadBlock(buffer, param.buffer.data(), param.buffer.size());
 
-    rb.Push(dest_applet->ReceiveParameter(param));
+    SendParameter(param);
+
+    // If the applet is running in HLE mode, use the HLE interface to communicate with it.
+    if (auto dest_applet = HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id))) {
+        rb.Push(dest_applet->ReceiveParameter(param));
+    } else {
+        rb.Push(RESULT_SUCCESS);
+    }
 }
 
 void ReceiveParameter(Service::Interface* self) {
@@ -765,7 +767,12 @@ void PrepareToStartLibraryApplet(Service::Interface* self) {
     IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x18, 1, 0); // 0x180040
     AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
 
+    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
+
     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+
+    // TODO(Subv): Launch the requested applet application.
+
     auto applet = HLE::Applets::Applet::Get(applet_id);
     if (applet) {
         LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id);
@@ -773,7 +780,6 @@ void PrepareToStartLibraryApplet(Service::Interface* self) {
     } else {
         rb.Push(HLE::Applets::Applet::Create(applet_id));
     }
-    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
 }
 
 void PrepareToStartNewestHomeMenu(Service::Interface* self) {
@@ -794,7 +800,12 @@ void PreloadLibraryApplet(Service::Interface* self) {
     IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x16, 1, 0); // 0x160040
     AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
 
+    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
+
     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
+
+    // TODO(Subv): Launch the requested applet application.
+
     auto applet = HLE::Applets::Applet::Get(applet_id);
     if (applet) {
         LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id);
@@ -802,34 +813,40 @@ void PreloadLibraryApplet(Service::Interface* self) {
     } else {
         rb.Push(HLE::Applets::Applet::Create(applet_id));
     }
-    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
 }
 
 void StartLibraryApplet(Service::Interface* self) {
     IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x1E, 2, 4); // 0x1E0084
     AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
-    std::shared_ptr<HLE::Applets::Applet> applet = HLE::Applets::Applet::Get(applet_id);
-
-    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
-
-    if (applet == nullptr) {
-        LOG_ERROR(Service_APT, "unknown applet id=%08X", applet_id);
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0, false);
-        rb.Push<u32>(-1); // TODO(Subv): Find the right error code
-        return;
-    }
 
     size_t buffer_size = rp.Pop<u32>();
     Kernel::Handle handle = rp.PopHandle();
     VAddr buffer_addr = rp.PopStaticBuffer();
 
-    AppletStartupParameter parameter;
-    parameter.object = Kernel::g_handle_table.GetGeneric(handle);
-    parameter.buffer.resize(buffer_size);
-    Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size());
+    LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
 
     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-    rb.Push(applet->Start(parameter));
+
+    // Send the Wakeup signal to the applet
+    MessageParameter param;
+    param.destination_id = static_cast<u32>(applet_id);
+    param.sender_id = static_cast<u32>(AppletId::Application);
+    param.object = Kernel::g_handle_table.GetGeneric(handle);
+    param.signal = static_cast<u32>(SignalType::Wakeup);
+    param.buffer.resize(buffer_size);
+    Memory::ReadBlock(buffer_addr, param.buffer.data(), param.buffer.size());
+    SendParameter(param);
+
+    // In case the applet is being HLEd, attempt to communicate with it.
+    if (auto applet = HLE::Applets::Applet::Get(applet_id)) {
+        AppletStartupParameter parameter;
+        parameter.object = Kernel::g_handle_table.GetGeneric(handle);
+        parameter.buffer.resize(buffer_size);
+        Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size());
+        rb.Push(applet->Start(parameter));
+    } else {
+        rb.Push(RESULT_SUCCESS);
+    }
 }
 
 void CancelLibraryApplet(Service::Interface* self) {
diff --git a/src/core/hle/service/apt/apt_s.cpp b/src/core/hle/service/apt/apt_s.cpp
index fe1d21fff..bb78ee7d7 100644
--- a/src/core/hle/service/apt/apt_s.cpp
+++ b/src/core/hle/service/apt/apt_s.cpp
@@ -20,7 +20,7 @@ const Interface::FunctionInfo FunctionTable[] = {
     {0x00090040, IsRegistered, "IsRegistered"},
     {0x000A0040, nullptr, "GetAttribute"},
     {0x000B0040, InquireNotification, "InquireNotification"},
-    {0x000C0104, nullptr, "SendParameter"},
+    {0x000C0104, SendParameter, "SendParameter"},
     {0x000D0080, ReceiveParameter, "ReceiveParameter"},
     {0x000E0080, GlanceParameter, "GlanceParameter"},
     {0x000F0100, nullptr, "CancelParameter"},
@@ -38,7 +38,7 @@ const Interface::FunctionInfo FunctionTable[] = {
     {0x001B00C4, nullptr, "StartApplication"},
     {0x001C0000, nullptr, "WakeupApplication"},
     {0x001D0000, nullptr, "CancelApplication"},
-    {0x001E0084, nullptr, "StartLibraryApplet"},
+    {0x001E0084, StartLibraryApplet, "StartLibraryApplet"},
     {0x001F0084, nullptr, "StartSystemApplet"},
     {0x00200044, nullptr, "StartNewestHomeMenu"},
     {0x00210000, nullptr, "OrderToCloseApplication"},
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index 66bc5823d..52686e364 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -14,6 +14,7 @@
 #include "core/core.h"
 #include "core/file_sys/archive_selfncch.h"
 #include "core/file_sys/ncch_container.h"
+#include "core/file_sys/title_metadata.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/service/cfg/cfg.h"
@@ -49,9 +50,19 @@ static std::string GetUpdateNCCHPath(u64_le program_id) {
     u32 high = static_cast<u32>((program_id | UPDATE_MASK) >> 32);
     u32 low = static_cast<u32>((program_id | UPDATE_MASK) & 0xFFFFFFFF);
 
-    return Common::StringFromFormat("%sNintendo 3DS/%s/%s/title/%08x/%08x/content/00000000.app",
-                                    FileUtil::GetUserPath(D_SDMC_IDX).c_str(), SYSTEM_ID, SDCARD_ID,
-                                    high, low);
+    // TODO(shinyquagsire23): Title database should be doing this path lookup
+    std::string content_path = Common::StringFromFormat(
+        "%sNintendo 3DS/%s/%s/title/%08x/%08x/content/", FileUtil::GetUserPath(D_SDMC_IDX).c_str(),
+        SYSTEM_ID, SDCARD_ID, high, low);
+    std::string tmd_path = content_path + "00000000.tmd";
+
+    u32 content_id = 0;
+    FileSys::TitleMetadata tmd(tmd_path);
+    if (tmd.Load() == ResultStatus::Success) {
+        content_id = tmd.GetBootContentID();
+    }
+
+    return Common::StringFromFormat("%s%08x.app", content_path.c_str(), content_id);
 }
 
 std::pair<boost::optional<u32>, ResultStatus> AppLoader_NCCH::LoadKernelSystemMode() {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3ab4af374..caf9f7a06 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -119,6 +119,224 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
     }
 }
 
+static void LoadDefaultVertexAttributes(u32 register_value) {
+    auto& regs = g_state.regs;
+
+    // TODO: Does actual hardware indeed keep an intermediate buffer or does
+    //       it directly write the values?
+    default_attr_write_buffer[default_attr_counter++] = register_value;
+
+    // Default attributes are written in a packed format such that four float24 values are encoded
+    // in three 32-bit numbers.
+    // We write to internal memory once a full such vector is written.
+    if (default_attr_counter >= 3) {
+        default_attr_counter = 0;
+
+        auto& setup = regs.pipeline.vs_default_attributes_setup;
+
+        if (setup.index >= 16) {
+            LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
+            return;
+        }
+
+        Math::Vec4<float24> attribute;
+
+        // NOTE: The destination component order indeed is "backwards"
+        attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
+        attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
+                                       ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
+        attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
+                                       ((default_attr_write_buffer[2] >> 24) & 0xFF));
+        attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
+
+        LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
+                  attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
+                  attribute.w.ToFloat32());
+
+        // TODO: Verify that this actually modifies the register!
+        if (setup.index < 15) {
+            g_state.input_default_attributes.attr[setup.index] = attribute;
+            setup.index++;
+        } else {
+            // Put each attribute into an immediate input buffer.  When all specified immediate
+            // attributes are present, the Vertex Shader is invoked and everything is sent to
+            // the primitive assembler.
+
+            auto& immediate_input = g_state.immediate.input_vertex;
+            auto& immediate_attribute_id = g_state.immediate.current_attribute;
+
+            immediate_input.attr[immediate_attribute_id] = attribute;
+
+            if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
+                immediate_attribute_id += 1;
+            } else {
+                MICROPROFILE_SCOPE(GPU_Drawing);
+                immediate_attribute_id = 0;
+
+                auto* shader_engine = Shader::GetEngine();
+                shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
+
+                // Send to vertex shader
+                if (g_debug_context)
+                    g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
+                                             static_cast<void*>(&immediate_input));
+                Shader::UnitState shader_unit;
+                Shader::AttributeBuffer output{};
+
+                shader_unit.LoadInput(regs.vs, immediate_input);
+                shader_engine->Run(g_state.vs, shader_unit);
+                shader_unit.WriteOutput(regs.vs, output);
+
+                // Send to geometry pipeline
+                if (g_state.immediate.reset_geometry_pipeline) {
+                    g_state.geometry_pipeline.Reconfigure();
+                    g_state.immediate.reset_geometry_pipeline = false;
+                }
+                ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
+                g_state.geometry_pipeline.Setup(shader_engine);
+                g_state.geometry_pipeline.SubmitVertex(output);
+
+                // TODO: If drawing after every immediate mode triangle kills performance,
+                // change it to flush triangles whenever a drawing config register changes
+                // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
+                VideoCore::g_renderer->Rasterizer()->DrawTriangles();
+                if (g_debug_context) {
+                    g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+                }
+            }
+        }
+    }
+}
+
+static void Draw(u32 command_id) {
+    MICROPROFILE_SCOPE(GPU_Drawing);
+    auto& regs = g_state.regs;
+
+#if PICA_LOG_TEV
+    DebugUtils::DumpTevStageConfig(regs.GetTevStages());
+#endif
+    if (g_debug_context)
+        g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
+
+    // Processes information about internal vertex attributes to figure out how a vertex is
+    // loaded.
+    // Later, these can be compiled and cached.
+    const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
+    VertexLoader loader(regs.pipeline);
+
+    // Load vertices
+    bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
+
+    const auto& index_info = regs.pipeline.index_array;
+    const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
+    const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
+    bool index_u16 = index_info.format != 0;
+
+    PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
+
+    if (g_debug_context && g_debug_context->recorder) {
+        for (int i = 0; i < 3; ++i) {
+            const auto texture = regs.texturing.GetTextures()[i];
+            if (!texture.enabled)
+                continue;
+
+            u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
+            g_debug_context->recorder->MemoryAccessed(
+                texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
+                                  texture.config.width / 2 * texture.config.height,
+                texture.config.GetPhysicalAddress());
+        }
+    }
+
+    DebugUtils::MemoryAccessTracker memory_accesses;
+
+    // Simple circular-replacement vertex cache
+    // The size has been tuned for optimal balance between hit-rate and the cost of lookup
+    const size_t VERTEX_CACHE_SIZE = 32;
+    std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
+    std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
+    Shader::AttributeBuffer vs_output;
+
+    unsigned int vertex_cache_pos = 0;
+    vertex_cache_ids.fill(-1);
+
+    auto* shader_engine = Shader::GetEngine();
+    Shader::UnitState shader_unit;
+
+    shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
+
+    g_state.geometry_pipeline.Reconfigure();
+    g_state.geometry_pipeline.Setup(shader_engine);
+    if (g_state.geometry_pipeline.NeedIndexInput())
+        ASSERT(is_indexed);
+
+    for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
+        // Indexed rendering doesn't use the start offset
+        unsigned int vertex = is_indexed
+                                  ? (index_u16 ? index_address_16[index] : index_address_8[index])
+                                  : (index + regs.pipeline.vertex_offset);
+
+        // -1 is a common special value used for primitive restart. Since it's unknown if
+        // the PICA supports it, and it would mess up the caching, guard against it here.
+        ASSERT(vertex != -1);
+
+        bool vertex_cache_hit = false;
+
+        if (is_indexed) {
+            if (g_state.geometry_pipeline.NeedIndexInput()) {
+                g_state.geometry_pipeline.SubmitIndex(vertex);
+                continue;
+            }
+
+            if (g_debug_context && Pica::g_debug_context->recorder) {
+                int size = index_u16 ? 2 : 1;
+                memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
+            }
+
+            for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
+                if (vertex == vertex_cache_ids[i]) {
+                    vs_output = vertex_cache[i];
+                    vertex_cache_hit = true;
+                    break;
+                }
+            }
+        }
+
+        if (!vertex_cache_hit) {
+            // Initialize data for the current vertex
+            Shader::AttributeBuffer input;
+            loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
+
+            // Send to vertex shader
+            if (g_debug_context)
+                g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
+                                         (void*)&input);
+            shader_unit.LoadInput(regs.vs, input);
+            shader_engine->Run(g_state.vs, shader_unit);
+            shader_unit.WriteOutput(regs.vs, vs_output);
+
+            if (is_indexed) {
+                vertex_cache[vertex_cache_pos] = vs_output;
+                vertex_cache_ids[vertex_cache_pos] = vertex;
+                vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
+            }
+        }
+
+        // Send to geometry pipeline
+        g_state.geometry_pipeline.SubmitVertex(vs_output);
+    }
+
+    for (auto& range : memory_accesses.ranges) {
+        g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
+                                                  range.second, range.first);
+    }
+
+    VideoCore::g_renderer->Rasterizer()->DrawTriangles();
+    if (g_debug_context) {
+        g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+    }
+}
+
 static void WritePicaReg(u32 id, u32 value, u32 mask) {
     auto& regs = g_state.regs;
 
@@ -168,95 +386,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
     // Load default vertex input attributes
     case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
     case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
-    case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): {
-        // TODO: Does actual hardware indeed keep an intermediate buffer or does
-        //       it directly write the values?
-        default_attr_write_buffer[default_attr_counter++] = value;
-
-        // Default attributes are written in a packed format such that four float24 values are
-        // encoded in
-        // three 32-bit numbers. We write to internal memory once a full such vector is
-        // written.
-        if (default_attr_counter >= 3) {
-            default_attr_counter = 0;
-
-            auto& setup = regs.pipeline.vs_default_attributes_setup;
-
-            if (setup.index >= 16) {
-                LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
-                break;
-            }
-
-            Math::Vec4<float24> attribute;
-
-            // NOTE: The destination component order indeed is "backwards"
-            attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
-            attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
-                                           ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
-            attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
-                                           ((default_attr_write_buffer[2] >> 24) & 0xFF));
-            attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
-
-            LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
-                      attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
-                      attribute.w.ToFloat32());
-
-            // TODO: Verify that this actually modifies the register!
-            if (setup.index < 15) {
-                g_state.input_default_attributes.attr[setup.index] = attribute;
-                setup.index++;
-            } else {
-                // Put each attribute into an immediate input buffer.  When all specified immediate
-                // attributes are present, the Vertex Shader is invoked and everything is sent to
-                // the primitive assembler.
-
-                auto& immediate_input = g_state.immediate.input_vertex;
-                auto& immediate_attribute_id = g_state.immediate.current_attribute;
-
-                immediate_input.attr[immediate_attribute_id] = attribute;
-
-                if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
-                    immediate_attribute_id += 1;
-                } else {
-                    MICROPROFILE_SCOPE(GPU_Drawing);
-                    immediate_attribute_id = 0;
-
-                    auto* shader_engine = Shader::GetEngine();
-                    shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
-
-                    // Send to vertex shader
-                    if (g_debug_context)
-                        g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
-                                                 static_cast<void*>(&immediate_input));
-                    Shader::UnitState shader_unit;
-                    Shader::AttributeBuffer output{};
-
-                    shader_unit.LoadInput(regs.vs, immediate_input);
-                    shader_engine->Run(g_state.vs, shader_unit);
-                    shader_unit.WriteOutput(regs.vs, output);
-
-                    // Send to geometry pipeline
-                    if (g_state.immediate.reset_geometry_pipeline) {
-                        g_state.geometry_pipeline.Reconfigure();
-                        g_state.immediate.reset_geometry_pipeline = false;
-                    }
-                    ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
-                    g_state.geometry_pipeline.Setup(shader_engine);
-                    g_state.geometry_pipeline.SubmitVertex(output);
-
-                    // TODO: If drawing after every immediate mode triangle kills performance,
-                    // change it to flush triangles whenever a drawing config register changes
-                    // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
-                    VideoCore::g_renderer->Rasterizer()->DrawTriangles();
-                    if (g_debug_context) {
-                        g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch,
-                                                 nullptr);
-                    }
-                }
-            }
-        }
+    case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235):
+        LoadDefaultVertexAttributes(value);
         break;
-    }
 
     case PICA_REG_INDEX(pipeline.gpu_mode):
         // This register likely just enables vertex processing and doesn't need any special handling
@@ -275,136 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 
     // It seems like these trigger vertex rendering
     case PICA_REG_INDEX(pipeline.trigger_draw):
-    case PICA_REG_INDEX(pipeline.trigger_draw_indexed): {
-        MICROPROFILE_SCOPE(GPU_Drawing);
-
-#if PICA_LOG_TEV
-        DebugUtils::DumpTevStageConfig(regs.GetTevStages());
-#endif
-        if (g_debug_context)
-            g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
-
-        // Processes information about internal vertex attributes to figure out how a vertex is
-        // loaded.
-        // Later, these can be compiled and cached.
-        const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
-        VertexLoader loader(regs.pipeline);
-
-        // Load vertices
-        bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
-
-        const auto& index_info = regs.pipeline.index_array;
-        const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
-        const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
-        bool index_u16 = index_info.format != 0;
-
-        PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
-
-        if (g_debug_context && g_debug_context->recorder) {
-            for (int i = 0; i < 3; ++i) {
-                const auto texture = regs.texturing.GetTextures()[i];
-                if (!texture.enabled)
-                    continue;
-
-                u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
-                g_debug_context->recorder->MemoryAccessed(
-                    texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
-                                      texture.config.width / 2 * texture.config.height,
-                    texture.config.GetPhysicalAddress());
-            }
-        }
-
-        DebugUtils::MemoryAccessTracker memory_accesses;
-
-        // Simple circular-replacement vertex cache
-        // The size has been tuned for optimal balance between hit-rate and the cost of lookup
-        const size_t VERTEX_CACHE_SIZE = 32;
-        std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
-        std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
-        Shader::AttributeBuffer vs_output;
-
-        unsigned int vertex_cache_pos = 0;
-        vertex_cache_ids.fill(-1);
-
-        auto* shader_engine = Shader::GetEngine();
-        Shader::UnitState shader_unit;
-
-        shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
-
-        g_state.geometry_pipeline.Reconfigure();
-        g_state.geometry_pipeline.Setup(shader_engine);
-        if (g_state.geometry_pipeline.NeedIndexInput())
-            ASSERT(is_indexed);
-
-        for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
-            // Indexed rendering doesn't use the start offset
-            unsigned int vertex =
-                is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
-                           : (index + regs.pipeline.vertex_offset);
-
-            // -1 is a common special value used for primitive restart. Since it's unknown if
-            // the PICA supports it, and it would mess up the caching, guard against it here.
-            ASSERT(vertex != -1);
-
-            bool vertex_cache_hit = false;
-
-            if (is_indexed) {
-                if (g_state.geometry_pipeline.NeedIndexInput()) {
-                    g_state.geometry_pipeline.SubmitIndex(vertex);
-                    continue;
-                }
-
-                if (g_debug_context && Pica::g_debug_context->recorder) {
-                    int size = index_u16 ? 2 : 1;
-                    memory_accesses.AddAccess(base_address + index_info.offset + size * index,
-                                              size);
-                }
-
-                for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
-                    if (vertex == vertex_cache_ids[i]) {
-                        vs_output = vertex_cache[i];
-                        vertex_cache_hit = true;
-                        break;
-                    }
-                }
-            }
-
-            if (!vertex_cache_hit) {
-                // Initialize data for the current vertex
-                Shader::AttributeBuffer input;
-                loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
-
-                // Send to vertex shader
-                if (g_debug_context)
-                    g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
-                                             (void*)&input);
-                shader_unit.LoadInput(regs.vs, input);
-                shader_engine->Run(g_state.vs, shader_unit);
-                shader_unit.WriteOutput(regs.vs, vs_output);
-
-                if (is_indexed) {
-                    vertex_cache[vertex_cache_pos] = vs_output;
-                    vertex_cache_ids[vertex_cache_pos] = vertex;
-                    vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
-                }
-            }
-
-            // Send to geometry pipeline
-            g_state.geometry_pipeline.SubmitVertex(vs_output);
-        }
-
-        for (auto& range : memory_accesses.ranges) {
-            g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
-                                                      range.second, range.first);
-        }
-
-        VideoCore::g_renderer->Rasterizer()->DrawTriangles();
-        if (g_debug_context) {
-            g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
-        }
-
+    case PICA_REG_INDEX(pipeline.trigger_draw_indexed):
+        Draw(id);
         break;
-    }
 
     case PICA_REG_INDEX(gs.bool_uniforms):
         WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value());