diff options
220 files changed, 4309 insertions, 2103 deletions
diff --git a/.travis.yml b/.travis.yml index 9512f7843..93fda1dfa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ matrix: - os: osx env: NAME="macos build" sudo: false - osx_image: xcode10.1 + osx_image: xcode10.2 install: "./.travis/macos/deps.sh" script: "./.travis/macos/build.sh" after_success: "./.travis/macos/upload.sh" diff --git a/.travis/linux-mingw/build.sh b/.travis/linux-mingw/build.sh index be03cc0f3..b12d70b12 100755 --- a/.travis/linux-mingw/build.sh +++ b/.travis/linux-mingw/build.sh @@ -1,3 +1,3 @@ #!/bin/bash -ex mkdir "$HOME/.ccache" || true -docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh +docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh diff --git a/.travis/linux-mingw/deps.sh b/.travis/linux-mingw/deps.sh index 540bb934a..55b5d6006 100755 --- a/.travis/linux-mingw/deps.sh +++ b/.travis/linux-mingw/deps.sh @@ -1,3 +1,3 @@ #!/bin/sh -ex -docker pull ubuntu:18.04 +docker pull yuzuemu/build-environments:linux-mingw diff --git a/.travis/linux-mingw/docker.sh b/.travis/linux-mingw/docker.sh index 6cf43a006..28033acfb 100755 --- a/.travis/linux-mingw/docker.sh +++ b/.travis/linux-mingw/docker.sh @@ -1,16 +1,6 @@ #!/bin/bash -ex cd /yuzu -MINGW_PACKAGES="sdl2-mingw-w64 qt5base-mingw-w64 qt5tools-mingw-w64 libsamplerate-mingw-w64 qt5multimedia-mingw-w64" -apt-get update -apt-get install -y gpg wget git python3-pip python ccache g++-mingw-w64-x86-64 gcc-mingw-w64-x86-64 mingw-w64-tools cmake -echo 'deb http://ppa.launchpad.net/tobydox/mingw-w64/ubuntu bionic main ' > /etc/apt/sources.list.d/extras.list -apt-key adv --keyserver keyserver.ubuntu.com --recv '72931B477E22FEFD47F8DECE02FE5F12ADDE29B2' -apt-get update -apt-get install -y ${MINGW_PACKAGES} - -# fix a problem in current MinGW headers -wget -q https://raw.githubusercontent.com/Alexpux/mingw-w64/d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-headers/crt/errno.h -O /usr/x86_64-w64-mingw32/include/errno.h # override Travis CI unreasonable ccache size echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf" @@ -23,8 +13,8 @@ echo '' >> /bin/cmd chmod +x /bin/cmd mkdir build && cd build -cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release -make -j4 +cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release +ninja # Clean up the dirty hacks rm /bin/uname && mv /bin/uname1 /bin/uname diff --git a/.travis/linux/build.sh b/.travis/linux/build.sh index 2fced727d..3929f97fc 100755 --- a/.travis/linux/build.sh +++ b/.travis/linux/build.sh @@ -1,4 +1,4 @@ #!/bin/bash -ex mkdir -p "$HOME/.ccache" -docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh +docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.travis/linux/docker.sh diff --git a/.travis/linux/deps.sh b/.travis/linux/deps.sh index 540bb934a..8d23c517d 100755 --- a/.travis/linux/deps.sh +++ b/.travis/linux/deps.sh @@ -1,3 +1,3 @@ #!/bin/sh -ex -docker pull ubuntu:18.04 +docker pull yuzuemu/build-environments:linux-fresh diff --git a/.travis/linux/docker.sh b/.travis/linux/docker.sh index 8b7e65911..3a9970384 100755 --- a/.travis/linux/docker.sh +++ b/.travis/linux/docker.sh @@ -1,12 +1,9 @@ #!/bin/bash -ex -apt-get update -apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev qtwebengine5-dev wget cmake ninja-build ccache - cd /yuzu mkdir build && cd build -cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja +cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON ninja ccache -s diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh index b7b4c6f8c..0abd1a93a 100755 --- a/.travis/macos/build.sh +++ b/.travis/macos/build.sh @@ -7,6 +7,7 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5 export UNICORNDIR=$(pwd)/externals/unicorn export PATH="/usr/local/opt/ccache/libexec:$PATH" +# TODO: Build using ninja instead of make mkdir build && cd build cmake --version cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON diff --git a/.travis/macos/upload.sh b/.travis/macos/upload.sh index 66e3455ff..c2f43a906 100755 --- a/.travis/macos/upload.sh +++ b/.travis/macos/upload.sh @@ -19,18 +19,6 @@ $(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/yuzu.app" -executable="${R # move libs into folder for deployment macpack "${REV_NAME}/yuzu-cmd" -d "libs" -# Make the yuzu.app application launch a debugging terminal. -# Store away the actual binary -mv ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu-bin - -cat > ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu <<EOL -#!/usr/bin/env bash -cd "\`dirname "\$0"\`" -chmod +x yuzu-bin -open yuzu-bin --args "\$@" -EOL -# Content that will serve as the launching script for yuzu (within the .app folder) - # Make the launching script executable chmod +x ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a417017c..bfa104034 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,7 +132,7 @@ find_package(Threads REQUIRED) if (ENABLE_SDL2) if (YUZU_USE_BUNDLED_SDL2) # Detect toolchain and platform - if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64) + if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64) set(SDL2_VER "SDL2-2.0.8") else() message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.") @@ -165,7 +165,7 @@ if (YUZU_USE_BUNDLED_UNICORN) if (MSVC) message(STATUS "unicorn not found, falling back to bundled") # Detect toolchain and platform - if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64) + if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64) set(UNICORN_VER "unicorn-yuzu") else() message(FATAL_ERROR "No bundled Unicorn binaries for your toolchain. Disable YUZU_USE_BUNDLED_UNICORN and provide your own.") @@ -233,7 +233,7 @@ endif() if (ENABLE_QT) if (YUZU_USE_BUNDLED_QT) - if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64) + if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64) set(QT_VER qt-5.12.0-msvc2017_64) else() message(FATAL_ERROR "No bundled Qt binaries for your toolchain. Disable YUZU_USE_BUNDLED_QT and provide your own.") diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 08315a1f1..5e00d839f 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -19,7 +19,7 @@ set(BUILD_VERSION "0") if (BUILD_REPOSITORY) # regex capture the string nightly or canary into CMAKE_MATCH_1 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY}) - if (${CMAKE_MATCH_COUNT} GREATER 0) + if ("${CMAKE_MATCH_COUNT}" GREATER 0) # capitalize the first letter of each word in the repo name. string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1}) foreach(WORD ${REPO_NAME_LIST}) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 3f8b6cda8..e6fa11a03 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -7,6 +7,10 @@ include(DownloadExternals) add_library(catch-single-include INTERFACE) target_include_directories(catch-single-include INTERFACE catch/single_include) +# libfmt +add_subdirectory(fmt) +add_library(fmt::fmt ALIAS fmt) + # Dynarmic if (ARCHITECTURE_x86_64) set(DYNARMIC_TESTS OFF) @@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64) add_subdirectory(dynarmic) endif() -# libfmt -add_subdirectory(fmt) -add_library(fmt::fmt ALIAS fmt) - # getopt if (MSVC) add_subdirectory(getopt) diff --git a/externals/dynarmic b/externals/dynarmic -Subproject 4e6848d1c9e8dadc70595c15b5589f8b14aad47 +Subproject 2683a9a3e316b5c3f387bbe6787732b9ff44b8d diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6c99dd5e2..04018233f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,15 +18,32 @@ if (MSVC) # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors. add_definitions(-DWIN32_LEAN_AND_MEAN) - # /W3 - Level 3 warnings - # /MP - Multi-threaded compilation - # /Zi - Output debugging information - # /Zo - enhanced debug info for optimized builds - # /permissive- - enables stricter C++ standards conformance checks - # /EHsc - C++-only exception handling semantics - # /Zc:throwingNew - let codegen assume `operator new` will never return null - # /Zc:inline - let codegen omit inline functions in object files - add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline) + # Ensure that projects build with Unicode support. + add_definitions(-DUNICODE -D_UNICODE) + + # /W3 - Level 3 warnings + # /MP - Multi-threaded compilation + # /Zi - Output debugging information + # /Zo - Enhanced debug info for optimized builds + # /permissive- - Enables stricter C++ standards conformance checks + # /EHsc - C++-only exception handling semantics + # /volatile:iso - Use strict standards-compliant volatile semantics. + # /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates + # /Zc:inline - Let codegen omit inline functions in object files + # /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null + add_compile_options( + /W3 + /MP + /Zi + /Zo + /permissive- + /EHsc + /std:c++latest + /volatile:iso + /Zc:externConstexpr + /Zc:inline + /Zc:throwingNew + ) # /GS- - No stack buffer overflow checks add_compile_options("$<$<CONFIG:Release>:/GS->") @@ -34,7 +51,10 @@ if (MSVC) set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE) set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) else() - add_compile_options("-Wno-attributes") + add_compile_options( + -Wall + -Wno-attributes + ) if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang) add_compile_options("-stdlib=libc++") diff --git a/src/common/lz4_compression.h b/src/common/lz4_compression.h index fe2231a6c..4c16f6e03 100644 --- a/src/common/lz4_compression.h +++ b/src/common/lz4_compression.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <vector> #include "common/common_types.h" diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp index 60a35c67c..978526492 100644 --- a/src/common/zstd_compression.cpp +++ b/src/common/zstd_compression.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <algorithm> #include <zstd.h> diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h index e0a64b035..e9de941c8 100644 --- a/src/common/zstd_compression.h +++ b/src/common/zstd_compression.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <vector> #include "common/common_types.h" diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index c59107102..2ace866ee 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -88,6 +88,10 @@ add_library(core STATIC file_sys/vfs_vector.h file_sys/xts_archive.cpp file_sys/xts_archive.h + frontend/applets/error.cpp + frontend/applets/error.h + frontend/applets/general_frontend.cpp + frontend/applets/general_frontend.h frontend/applets/profile_select.cpp frontend/applets/profile_select.h frontend/applets/software_keyboard.cpp @@ -177,12 +181,14 @@ add_library(core STATIC hle/service/am/applet_oe.h hle/service/am/applets/applets.cpp hle/service/am/applets/applets.h + hle/service/am/applets/error.cpp + hle/service/am/applets/error.h + hle/service/am/applets/general_backend.cpp + hle/service/am/applets/general_backend.h hle/service/am/applets/profile_select.cpp hle/service/am/applets/profile_select.h hle/service/am/applets/software_keyboard.cpp hle/service/am/applets/software_keyboard.h - hle/service/am/applets/stub_applet.cpp - hle/service/am/applets/stub_applet.h hle/service/am/applets/web_browser.cpp hle/service/am/applets/web_browser.h hle/service/am/idle.cpp diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 4dfd41b43..978b1518f 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -7,6 +7,10 @@ #include <array> #include "common/common_types.h" +namespace Common { +struct PageTable; +} + namespace Kernel { enum class VMAPermission : u8; } @@ -49,8 +53,14 @@ public: /// Clear all instruction cache virtual void ClearInstructionCache() = 0; - /// Notify CPU emulation that page tables have changed - virtual void PageTableChanged() = 0; + /// Notifies CPU emulation that the current page table has changed. + /// + /// @param new_page_table The new page table. + /// @param new_address_space_size_in_bits The new usable size of the address space in bits. + /// This can be either 32, 36, or 39 on official software. + /// + virtual void PageTableChanged(Common::PageTable& new_page_table, + std::size_t new_address_space_size_in_bits) = 0; /** * Set the Program Counter to an address diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index dc96e35d5..44307fa19 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -14,7 +14,6 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/vm_manager.h" @@ -129,18 +128,16 @@ public: u64 tpidr_el0 = 0; }; -std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const { - auto* current_process = system.Kernel().CurrentProcess(); - auto** const page_table = current_process->VMManager().page_table.pointers.data(); - +std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const { Dynarmic::A64::UserConfig config; // Callbacks config.callbacks = cb.get(); // Memory - config.page_table = reinterpret_cast<void**>(page_table); - config.page_table_address_space_bits = current_process->VMManager().GetAddressSpaceWidth(); + config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); + config.page_table_address_space_bits = address_space_bits; config.silently_mirror_page_table = false; // Multi-process state @@ -176,12 +173,7 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index) : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, core_index{core_index}, system{system}, - exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { - ThreadContext ctx{}; - inner_unicorn.SaveContext(ctx); - PageTableChanged(); - LoadContext(ctx); -} + exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} ARM_Dynarmic::~ARM_Dynarmic() = default; @@ -276,8 +268,9 @@ void ARM_Dynarmic::ClearExclusiveState() { jit->ClearExclusiveState(); } -void ARM_Dynarmic::PageTableChanged() { - jit = MakeJit(); +void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, + std::size_t new_address_space_size_in_bits) { + jit = MakeJit(page_table, new_address_space_size_in_bits); } DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index c1db254e8..b701e97a3 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -48,10 +48,12 @@ public: void ClearExclusiveState() override; void ClearInstructionCache() override; - void PageTableChanged() override; + void PageTableChanged(Common::PageTable& new_page_table, + std::size_t new_address_space_size_in_bits) override; private: - std::unique_ptr<Dynarmic::A64::Jit> MakeJit() const; + std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const; friend class ARM_Dynarmic_Callbacks; std::unique_ptr<ARM_Dynarmic_Callbacks> cb; diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 209fc16ad..34e974b4d 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -41,7 +41,7 @@ public: void Run() override; void Step() override; void ClearInstructionCache() override; - void PageTableChanged() override{}; + void PageTableChanged(Common::PageTable&, std::size_t) override {} void RecordBreak(GDBStub::BreakpointAddress bkpt); private: diff --git a/src/core/core.cpp b/src/core/core.cpp index bc9e887b6..7106151bd 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -3,9 +3,7 @@ // Refer to the license.txt file included. #include <array> -#include <map> #include <memory> -#include <thread> #include <utility> #include "common/file_util.h" @@ -20,13 +18,18 @@ #include "core/file_sys/registered_cache.h" #include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_real.h" +#include "core/frontend/applets/error.h" +#include "core/frontend/applets/general_frontend.h" +#include "core/frontend/applets/profile_select.h" +#include "core/frontend/applets/software_keyboard.h" +#include "core/frontend/applets/web_browser.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" -#include "core/hle/service/am/applets/software_keyboard.h" +#include "core/hle/service/am/applets/applets.h" #include "core/hle/service/service.h" #include "core/hle/service/sm/sm.h" #include "core/loader/loader.h" @@ -38,8 +41,6 @@ #include "frontend/applets/software_keyboard.h" #include "frontend/applets/web_browser.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/gpu_asynch.h" -#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" @@ -81,7 +82,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, return vfs->OpenFile(path, FileSys::Mode::Read); } struct System::Impl { - explicit Impl(System& system) : kernel{system} {} + explicit Impl(System& system) : kernel{system}, cpu_core_manager{system} {} Cpu& CurrentCpuCore() { return cpu_core_manager.GetCurrentCore(); @@ -99,6 +100,7 @@ struct System::Impl { LOG_DEBUG(HW_Memory, "initialized OK"); core_timing.Initialize(); + cpu_core_manager.Initialize(); kernel.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( @@ -113,15 +115,7 @@ struct System::Impl { content_provider = std::make_unique<FileSys::ContentProviderUnion>(); /// Create default implementations of applets if one is not provided. - if (profile_selector == nullptr) - profile_selector = std::make_unique<Core::Frontend::DefaultProfileSelectApplet>(); - if (software_keyboard == nullptr) - software_keyboard = std::make_unique<Core::Frontend::DefaultSoftwareKeyboardApplet>(); - if (web_browser == nullptr) - web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); - - auto main_process = Kernel::Process::Create(system, "main"); - kernel.MakeCurrentProcess(main_process.get()); + applet_manager.SetDefaultAppletsIfMissing(); telemetry_session = std::make_unique<Core::TelemetrySession>(); service_manager = std::make_shared<Service::SM::ServiceManager>(); @@ -134,15 +128,9 @@ struct System::Impl { return ResultStatus::ErrorVideoCore; } - is_powered_on = true; + gpu_core = VideoCore::CreateGPU(system); - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer); - } else { - gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer); - } - - cpu_core_manager.Initialize(system); + is_powered_on = true; LOG_DEBUG(Core, "Initialized OK"); @@ -179,7 +167,8 @@ struct System::Impl { return init_result; } - const Loader::ResultStatus load_result{app_loader->Load(*kernel.CurrentProcess())}; + auto main_process = Kernel::Process::Create(system, "main"); + const auto [load_result, load_parameters] = app_loader->Load(*main_process); if (load_result != Loader::ResultStatus::Success) { LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result)); Shutdown(); @@ -187,6 +176,16 @@ struct System::Impl { return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + static_cast<u32>(load_result)); } + kernel.MakeCurrentProcess(main_process.get()); + + // Main process has been loaded and been made current. + // Begin GPU and CPU execution. + gpu_core->Start(); + cpu_core_manager.StartThreads(); + + // All threads are started, begin main process execution, now that we're in the clear. + main_process->Run(load_parameters->main_thread_priority, + load_parameters->main_thread_stack_size); status = ResultStatus::Success; return status; @@ -224,9 +223,7 @@ struct System::Impl { app_loader.reset(); // Clear all applets - profile_selector.reset(); - software_keyboard.reset(); - web_browser.reset(); + applet_manager.ClearAll(); LOG_DEBUG(Core, "Shutdown OK"); } @@ -265,9 +262,7 @@ struct System::Impl { std::unique_ptr<FileSys::CheatEngine> cheat_engine; /// Frontend applets - std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector; - std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard; - std::unique_ptr<Core::Frontend::WebBrowserApplet> web_browser; + Service::AM::Applets::AppletManager applet_manager; /// Service manager std::shared_ptr<Service::SM::ServiceManager> service_manager; @@ -477,20 +472,20 @@ std::shared_ptr<FileSys::VfsFilesystem> System::GetFilesystem() const { return impl->virtual_filesystem; } -void System::SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet) { - impl->profile_selector = std::move(applet); +void System::SetAppletFrontendSet(Service::AM::Applets::AppletFrontendSet&& set) { + impl->applet_manager.SetAppletFrontendSet(std::move(set)); } -const Frontend::ProfileSelectApplet& System::GetProfileSelector() const { - return *impl->profile_selector; +void System::SetDefaultAppletFrontendSet() { + impl->applet_manager.SetDefaultAppletFrontendSet(); } -void System::SetSoftwareKeyboard(std::unique_ptr<Frontend::SoftwareKeyboardApplet> applet) { - impl->software_keyboard = std::move(applet); +Service::AM::Applets::AppletManager& System::GetAppletManager() { + return impl->applet_manager; } -const Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const { - return *impl->software_keyboard; +const Service::AM::Applets::AppletManager& System::GetAppletManager() const { + return impl->applet_manager; } void System::SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider) { @@ -514,18 +509,6 @@ void System::ClearContentProvider(FileSys::ContentProviderUnionSlot slot) { impl->content_provider->ClearSlot(slot); } -void System::SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet) { - impl->web_browser = std::move(applet); -} - -Frontend::WebBrowserApplet& System::GetWebBrowser() { - return *impl->web_browser; -} - -const Frontend::WebBrowserApplet& System::GetWebBrowser() const { - return *impl->web_browser; -} - System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { return impl->Init(*this, emu_window); } diff --git a/src/core/core.h b/src/core/core.h index 82b2e087e..a9a756a4c 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -14,9 +14,6 @@ namespace Core::Frontend { class EmuWindow; -class ProfileSelectApplet; -class SoftwareKeyboardApplet; -class WebBrowserApplet; } // namespace Core::Frontend namespace FileSys { @@ -38,9 +35,18 @@ class AppLoader; enum class ResultStatus : u16; } // namespace Loader -namespace Service::SM { +namespace Service { + +namespace AM::Applets { +struct AppletFrontendSet; +class AppletManager; +} // namespace AM::Applets + +namespace SM { class ServiceManager; -} // namespace Service::SM +} // namespace SM + +} // namespace Service namespace Tegra { class DebugContext; @@ -260,18 +266,13 @@ public: void RegisterCheatList(const std::vector<FileSys::CheatList>& list, const std::string& build_id, VAddr code_region_start, VAddr code_region_end); - void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet); - - const Frontend::ProfileSelectApplet& GetProfileSelector() const; - - void SetSoftwareKeyboard(std::unique_ptr<Frontend::SoftwareKeyboardApplet> applet); + void SetAppletFrontendSet(Service::AM::Applets::AppletFrontendSet&& set); - const Frontend::SoftwareKeyboardApplet& GetSoftwareKeyboard() const; + void SetDefaultAppletFrontendSet(); - void SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet); + Service::AM::Applets::AppletManager& GetAppletManager(); - Frontend::WebBrowserApplet& GetWebBrowser(); - const Frontend::WebBrowserApplet& GetWebBrowser() const; + const Service::AM::Applets::AppletManager& GetAppletManager() const; void SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider); diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 93bc5619c..8fcb4eeb1 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp @@ -19,17 +19,19 @@ void RunCpuCore(const System& system, Cpu& cpu_state) { } } // Anonymous namespace -CpuCoreManager::CpuCoreManager() = default; +CpuCoreManager::CpuCoreManager(System& system) : system{system} {} CpuCoreManager::~CpuCoreManager() = default; -void CpuCoreManager::Initialize(System& system) { +void CpuCoreManager::Initialize() { barrier = std::make_unique<CpuBarrier>(); exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); for (std::size_t index = 0; index < cores.size(); ++index) { cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); } +} +void CpuCoreManager::StartThreads() { // Create threads for CPU cores 1-3, and build thread_to_cpu map // CPU core 0 is run on the main thread thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h index a4d70ec56..2cbbf8216 100644 --- a/src/core/cpu_core_manager.h +++ b/src/core/cpu_core_manager.h @@ -18,7 +18,7 @@ class System; class CpuCoreManager { public: - CpuCoreManager(); + explicit CpuCoreManager(System& system); CpuCoreManager(const CpuCoreManager&) = delete; CpuCoreManager(CpuCoreManager&&) = delete; @@ -27,7 +27,8 @@ public: CpuCoreManager& operator=(const CpuCoreManager&) = delete; CpuCoreManager& operator=(CpuCoreManager&&) = delete; - void Initialize(System& system); + void Initialize(); + void StartThreads(); void Shutdown(); Cpu& GetCore(std::size_t index); @@ -54,6 +55,8 @@ private: /// Map of guest threads to CPU cores std::map<std::thread::id, Cpu*> thread_to_cpu; + + System& system; }; } // namespace Core diff --git a/src/core/frontend/applets/error.cpp b/src/core/frontend/applets/error.cpp new file mode 100644 index 000000000..4002a9211 --- /dev/null +++ b/src/core/frontend/applets/error.cpp @@ -0,0 +1,34 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/frontend/applets/error.h" + +namespace Core::Frontend { + +ErrorApplet::~ErrorApplet() = default; + +void DefaultErrorApplet::ShowError(ResultCode error, std::function<void()> finished) const { + LOG_CRITICAL(Service_Fatal, "Application requested error display: {:04}-{:04} (raw={:08X})", + static_cast<u32>(error.module.Value()), error.description.Value(), error.raw); +} + +void DefaultErrorApplet::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, + std::function<void()> finished) const { + LOG_CRITICAL( + Service_Fatal, + "Application requested error display: {:04X}-{:04X} (raw={:08X}) with timestamp={:016X}", + static_cast<u32>(error.module.Value()), error.description.Value(), error.raw, time.count()); +} + +void DefaultErrorApplet::ShowCustomErrorText(ResultCode error, std::string main_text, + std::string detail_text, + std::function<void()> finished) const { + LOG_CRITICAL(Service_Fatal, + "Application requested custom error with error_code={:04X}-{:04X} (raw={:08X})", + static_cast<u32>(error.module.Value()), error.description.Value(), error.raw); + LOG_CRITICAL(Service_Fatal, " Main Text: {}", main_text); + LOG_CRITICAL(Service_Fatal, " Detail Text: {}", detail_text); +} + +} // namespace Core::Frontend diff --git a/src/core/frontend/applets/error.h b/src/core/frontend/applets/error.h new file mode 100644 index 000000000..699df940d --- /dev/null +++ b/src/core/frontend/applets/error.h @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <functional> + +#include "core/hle/result.h" + +namespace Core::Frontend { + +class ErrorApplet { +public: + virtual ~ErrorApplet(); + + virtual void ShowError(ResultCode error, std::function<void()> finished) const = 0; + + virtual void ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, + std::function<void()> finished) const = 0; + + virtual void ShowCustomErrorText(ResultCode error, std::string dialog_text, + std::string fullscreen_text, + std::function<void()> finished) const = 0; +}; + +class DefaultErrorApplet final : public ErrorApplet { +public: + void ShowError(ResultCode error, std::function<void()> finished) const override; + void ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, + std::function<void()> finished) const override; + void ShowCustomErrorText(ResultCode error, std::string main_text, std::string detail_text, + std::function<void()> finished) const override; +}; + +} // namespace Core::Frontend diff --git a/src/core/frontend/applets/general_frontend.cpp b/src/core/frontend/applets/general_frontend.cpp new file mode 100644 index 000000000..b974f2289 --- /dev/null +++ b/src/core/frontend/applets/general_frontend.cpp @@ -0,0 +1,27 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/frontend/applets/general_frontend.h" + +namespace Core::Frontend { + +PhotoViewerApplet::~PhotoViewerApplet() = default; + +DefaultPhotoViewerApplet::~DefaultPhotoViewerApplet() {} + +void DefaultPhotoViewerApplet::ShowPhotosForApplication(u64 title_id, + std::function<void()> finished) const { + LOG_INFO(Service_AM, + "Application requested frontend to display stored photos for title_id={:016X}", + title_id); + finished(); +} + +void DefaultPhotoViewerApplet::ShowAllPhotos(std::function<void()> finished) const { + LOG_INFO(Service_AM, "Application requested frontend to display all stored photos."); + finished(); +} + +} // namespace Core::Frontend diff --git a/src/core/frontend/applets/general_frontend.h b/src/core/frontend/applets/general_frontend.h new file mode 100644 index 000000000..d4506c999 --- /dev/null +++ b/src/core/frontend/applets/general_frontend.h @@ -0,0 +1,28 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include "common/common_types.h" + +namespace Core::Frontend { + +class PhotoViewerApplet { +public: + virtual ~PhotoViewerApplet(); + + virtual void ShowPhotosForApplication(u64 title_id, std::function<void()> finished) const = 0; + virtual void ShowAllPhotos(std::function<void()> finished) const = 0; +}; + +class DefaultPhotoViewerApplet final : public PhotoViewerApplet { +public: + ~DefaultPhotoViewerApplet() override; + + void ShowPhotosForApplication(u64 title_id, std::function<void()> finished) const override; + void ShowAllPhotos(std::function<void()> finished) const override; +}; + +} // namespace Core::Frontend diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index 1320bbe77..eda466a5d 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -10,6 +10,8 @@ namespace Core::Frontend { +GraphicsContext::~GraphicsContext() = default; + class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>, public std::enable_shared_from_this<TouchState> { public: diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 70a522556..e2c290dc1 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -19,6 +19,8 @@ namespace Core::Frontend { */ class GraphicsContext { public: + virtual ~GraphicsContext(); + /// Makes the graphics context current for the caller thread virtual void MakeCurrent() = 0; diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index ac0e1d796..5bb139483 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h @@ -438,7 +438,7 @@ inline float RequestParser::Pop() { template <> inline double RequestParser::Pop() { const u64 value = Pop<u64>(); - float real; + double real; std::memcpy(&real, &value, sizeof(real)); return real; } diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index fe710eb6e..42d9dd844 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -58,7 +58,7 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread( auto& kernel = Core::System::GetInstance().Kernel(); if (!writable_event) { // Create event if not provided - const auto pair = WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + const auto pair = WritableEvent::CreateEventPair(kernel, ResetType::Automatic, "HLE Pause Event: " + reason); writable_event = pair.writable; } diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 4d58e7c69..757e5f21f 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -46,8 +46,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ bool resume = true; - if (thread->GetStatus() == ThreadStatus::WaitSynchAny || - thread->GetStatus() == ThreadStatus::WaitSynchAll || + if (thread->GetStatus() == ThreadStatus::WaitSynch || thread->GetStatus() == ThreadStatus::WaitHLEEvent) { // Remove the thread from each of its waiting objects' waitlists for (const auto& object : thread->GetWaitObjects()) { @@ -182,7 +181,12 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) { void KernelCore::MakeCurrentProcess(Process* process) { impl->current_process = process; - Memory::SetCurrentPageTable(&process->VMManager().page_table); + + if (process == nullptr) { + return; + } + + Memory::SetCurrentPageTable(*process); } Process* KernelCore::CurrentProcess() { diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h index 332876c27..2821176a7 100644 --- a/src/core/hle/kernel/object.h +++ b/src/core/hle/kernel/object.h @@ -33,8 +33,8 @@ enum class HandleType : u32 { }; enum class ResetType { - OneShot, ///< Reset automatically on object acquisition - Sticky, ///< Never reset automatically + Automatic, ///< Reset automatically on object acquisition + Manual, ///< Never reset automatically }; class Object : NonCopyable { diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 4e94048da..0775a89fb 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -28,21 +28,20 @@ namespace { * * @param owner_process The parent process for the main thread * @param kernel The kernel instance to create the main thread under. - * @param entry_point The address at which the thread should start execution * @param priority The priority to give the main thread */ -void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { - // Initialize new "main" thread - const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); +void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { + const auto& vm_manager = owner_process.VMManager(); + const VAddr entry_point = vm_manager.GetCodeRegionBaseAddress(); + const VAddr stack_top = vm_manager.GetTLSIORegionEndAddress(); auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, owner_process.GetIdealCore(), stack_top, owner_process); SharedPtr<Thread> thread = std::move(thread_res).Unwrap(); // Register 1 must be a handle to the main thread - const Handle guest_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); - thread->SetGuestHandle(guest_handle); - thread->GetContext().cpu_registers[1] = guest_handle; + const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); + thread->GetContext().cpu_registers[1] = thread_handle; // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires thread->ResumeFromWait(); @@ -106,8 +105,6 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { is_64bit_process = metadata.Is64BitProgram(); vm_manager.Reset(metadata.GetAddressSpaceType()); - // Ensure that the potentially resized page table is seen by CPU backends. - Memory::SetCurrentPageTable(&vm_manager.page_table); const auto& caps = metadata.GetKernelCapabilities(); const auto capability_init_result = @@ -119,7 +116,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { return handle_table.SetSize(capabilities.GetHandleTableSize()); } -void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) { +void Process::Run(s32 main_thread_priority, u64 stack_size) { // The kernel always ensures that the given stack size is page aligned. main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); @@ -135,7 +132,7 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) { vm_manager.LogLayout(); ChangeStatus(ProcessStatus::Running); - SetupMainThread(*this, kernel, entry_point, main_thread_priority); + SetupMainThread(*this, kernel, main_thread_priority); } void Process::PrepareForTermination() { @@ -150,8 +147,7 @@ void Process::PrepareForTermination() { continue; // TODO(Subv): When are the other running/ready threads terminated? - ASSERT_MSG(thread->GetStatus() == ThreadStatus::WaitSynchAny || - thread->GetStatus() == ThreadStatus::WaitSynchAll, + ASSERT_MSG(thread->GetStatus() == ThreadStatus::WaitSynch, "Exiting processes with non-waiting threads is currently unimplemented"); thread->Stop(); @@ -242,13 +238,11 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); code_memory_size += module_.memory.size(); - - // Clear instruction cache in CPU JIT - system.InvalidateCpuInstructionCaches(); } Process::Process(Core::System& system) - : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {} + : WaitObject{system.Kernel()}, vm_manager{system}, + address_arbiter{system}, mutex{system}, system{system} {} Process::~Process() = default; diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index dda52f4c0..bf3b7eef3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -225,9 +225,12 @@ public: ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata); /** - * Applies address space changes and launches the process main thread. + * Starts the main application thread for this process. + * + * @param main_thread_priority The priority for the main thread. + * @param stack_size The stack size for the main thread in bytes. */ - void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size); + void Run(s32 main_thread_priority, u64 stack_size); /** * Prepares a process for termination by stopping all of its threads diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index c2b798a4e..06463cd26 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -21,8 +21,9 @@ bool ReadableEvent::ShouldWait(const Thread* thread) const { void ReadableEvent::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); - if (reset_type == ResetType::OneShot) + if (reset_type == ResetType::Automatic) { signaled = false; + } } void ReadableEvent::Signal() { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index a5bd92433..5a5851f66 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -424,7 +424,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han /// Default thread wakeup callback for WaitSynchronization static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thread> thread, SharedPtr<WaitObject> object, std::size_t index) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitSynchAny); + ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); if (reason == ThreadWakeupReason::Timeout) { thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); @@ -502,7 +502,7 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr } thread->SetWaitObjects(std::move(objects)); - thread->SetStatus(ThreadStatus::WaitSynchAny); + thread->SetStatus(ThreadStatus::WaitSynch); // Create an event to wake the thread up after the specified nanosecond delay has passed thread->WakeAfterDelay(nano_seconds); @@ -518,16 +518,14 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); - const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); + SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", thread_handle); return ERR_INVALID_HANDLE; } - ASSERT(thread->GetStatus() == ThreadStatus::WaitSynchAny); - thread->SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED); - thread->ResumeFromWait(); + thread->CancelWait(); return RESULT_SUCCESS; } @@ -1189,6 +1187,142 @@ static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address, query_address); } +static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, + u64 src_address, u64 size) { + LOG_DEBUG(Kernel_SVC, + "called. process_handle=0x{:08X}, dst_address=0x{:016X}, " + "src_address=0x{:016X}, size=0x{:016X}", + process_handle, dst_address, src_address, size); + + if (!Common::Is4KBAligned(src_address)) { + LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", + src_address); + return ERR_INVALID_ADDRESS; + } + + if (!Common::Is4KBAligned(dst_address)) { + LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", + dst_address); + return ERR_INVALID_ADDRESS; + } + + if (size == 0 || !Common::Is4KBAligned(size)) { + LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); + return ERR_INVALID_SIZE; + } + + if (!IsValidAddressRange(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range overflows the address space (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!IsValidAddressRange(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range overflows the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto process = handle_table.Get<Process>(process_handle); + if (!process) { + LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", + process_handle); + return ERR_INVALID_HANDLE; + } + + auto& vm_manager = process->VMManager(); + if (!vm_manager.IsWithinAddressSpace(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range is not within the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!vm_manager.IsWithinASLRRegion(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_MEMORY_RANGE; + } + + return vm_manager.MapCodeMemory(dst_address, src_address, size); +} + +static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, + u64 dst_address, u64 src_address, u64 size) { + LOG_DEBUG(Kernel_SVC, + "called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, " + "size=0x{:016X}", + process_handle, dst_address, src_address, size); + + if (!Common::Is4KBAligned(dst_address)) { + LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", + dst_address); + return ERR_INVALID_ADDRESS; + } + + if (!Common::Is4KBAligned(src_address)) { + LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", + src_address); + return ERR_INVALID_ADDRESS; + } + + if (size == 0 || Common::Is4KBAligned(size)) { + LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); + return ERR_INVALID_SIZE; + } + + if (!IsValidAddressRange(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range overflows the address space (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!IsValidAddressRange(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range overflows the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto process = handle_table.Get<Process>(process_handle); + if (!process) { + LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", + process_handle); + return ERR_INVALID_HANDLE; + } + + auto& vm_manager = process->VMManager(); + if (!vm_manager.IsWithinAddressSpace(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range is not within the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!vm_manager.IsWithinASLRRegion(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_MEMORY_RANGE; + } + + return vm_manager.UnmapCodeMemory(dst_address, src_address, size); +} + /// Exits the current process static void ExitProcess(Core::System& system) { auto* current_process = system.Kernel().CurrentProcess(); @@ -1244,20 +1378,22 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e return ERR_INVALID_THREAD_PRIORITY; } - const std::string name = fmt::format("thread-{:X}", entry_point); auto& kernel = system.Kernel(); CASCADE_RESULT(SharedPtr<Thread> thread, - Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top, + Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top, *current_process)); - const auto new_guest_handle = current_process->GetHandleTable().Create(thread); - if (new_guest_handle.Failed()) { + const auto new_thread_handle = current_process->GetHandleTable().Create(thread); + if (new_thread_handle.Failed()) { LOG_ERROR(Kernel_SVC, "Failed to create handle with error=0x{:X}", - new_guest_handle.Code().raw); - return new_guest_handle.Code(); + new_thread_handle.Code().raw); + return new_thread_handle.Code(); } - thread->SetGuestHandle(*new_guest_handle); - *out_handle = *new_guest_handle; + *out_handle = *new_thread_handle; + + // Set the thread name for debugging purposes. + thread->SetName( + fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); @@ -1851,7 +1987,7 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle auto& kernel = system.Kernel(); const auto [readable_event, writable_event] = - WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent"); + WritableEvent::CreateEventPair(kernel, ResetType::Manual, "CreateEvent"); HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable(); @@ -2054,8 +2190,8 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes, return RESULT_SUCCESS; } -ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids, - u32 out_thread_ids_size, Handle debug_handle) { +static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids, + u32 out_thread_ids_size, Handle debug_handle) { // TODO: Handle this case when debug events are supported. UNIMPLEMENTED_IF(debug_handle != InvalidHandle); @@ -2159,7 +2295,7 @@ static const FunctionDef SVC_Table[] = { {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, - {0x36, nullptr, "Unknown"}, + {0x36, nullptr, "SynchronizePreemptionState"}, {0x37, nullptr, "Unknown"}, {0x38, nullptr, "Unknown"}, {0x39, nullptr, "Unknown"}, @@ -2224,8 +2360,8 @@ static const FunctionDef SVC_Table[] = { {0x74, nullptr, "MapProcessMemory"}, {0x75, nullptr, "UnmapProcessMemory"}, {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"}, - {0x77, nullptr, "MapProcessCodeMemory"}, - {0x78, nullptr, "UnmapProcessCodeMemory"}, + {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"}, + {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, {0x79, nullptr, "CreateProcess"}, {0x7A, nullptr, "StartProcess"}, {0x7B, nullptr, "TerminateProcess"}, diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index b3690b5f3..865473c6f 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -44,6 +44,13 @@ void SvcWrap(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); } +template <ResultCode func(Core::System&, u32, u64, u64, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), + Param(system, 2), Param(system, 3)) + .raw); +} + template <ResultCode func(Core::System&, u32*)> void SvcWrap(Core::System& system) { u32 param = 0; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 1b891f632..2abf9efca 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -101,8 +101,7 @@ void Thread::ResumeFromWait() { ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects"); switch (status) { - case ThreadStatus::WaitSynchAll: - case ThreadStatus::WaitSynchAny: + case ThreadStatus::WaitSynch: case ThreadStatus::WaitHLEEvent: case ThreadStatus::WaitSleep: case ThreadStatus::WaitIPC: @@ -142,6 +141,12 @@ void Thread::ResumeFromWait() { ChangeScheduler(); } +void Thread::CancelWait() { + ASSERT(GetStatus() == ThreadStatus::WaitSynch); + SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED); + ResumeFromWait(); +} + /** * Resets a thread context, making it ready to be scheduled and run by the CPU * @param context Thread context to reset @@ -220,11 +225,6 @@ void Thread::SetPriority(u32 priority) { UpdatePriority(); } -void Thread::BoostPriority(u32 priority) { - scheduler->SetThreadPriority(this, priority); - current_priority = priority; -} - void Thread::SetWaitSynchronizationResult(ResultCode result) { context.cpu_registers[0] = result.raw; } diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index c340a801d..b4b9cda7c 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -58,8 +58,7 @@ enum class ThreadStatus { WaitHLEEvent, ///< Waiting for hle event to finish WaitSleep, ///< Waiting due to a SleepThread SVC WaitIPC, ///< Waiting for the reply from an IPC request - WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false - WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true + WaitSynch, ///< Waiting due to WaitSynchronization WaitMutex, ///< Waiting due to an ArbitrateLock svc WaitCondVar, ///< Waiting due to an WaitProcessWideKey svc WaitArb, ///< Waiting due to a SignalToAddress/WaitForAddress svc @@ -111,6 +110,11 @@ public: std::string GetName() const override { return name; } + + void SetName(std::string new_name) { + name = std::move(new_name); + } + std::string GetTypeName() const override { return "Thread"; } @@ -145,12 +149,6 @@ public: */ void SetPriority(u32 priority); - /** - * Temporarily boosts the thread's priority until the next time it is scheduled - * @param priority The new priority - */ - void BoostPriority(u32 priority); - /// Adds a thread to the list of threads that are waiting for a lock held by this thread. void AddMutexWaiter(SharedPtr<Thread> thread); @@ -179,11 +177,17 @@ public: return tls_memory; } - /** - * Resumes a thread from waiting - */ + /// Resumes a thread from waiting void ResumeFromWait(); + /// Cancels a waiting operation that this thread may or may not be within. + /// + /// When the thread is within a waiting state, this will set the thread's + /// waiting result to signal a canceled wait. The function will then resume + /// this thread. + /// + void CancelWait(); + /** * Schedules an event to wake up the specified thread after the specified delay * @param nanoseconds The time this thread will be allowed to sleep for @@ -194,24 +198,27 @@ public: void CancelWakeupTimer(); /** - * Sets the result after the thread awakens (from either WaitSynchronization SVC) + * Sets the result after the thread awakens (from svcWaitSynchronization) * @param result Value to set to the returned result */ void SetWaitSynchronizationResult(ResultCode result); /** - * Sets the output parameter value after the thread awakens (from WaitSynchronizationN SVC only) + * Sets the output parameter value after the thread awakens (from svcWaitSynchronization) * @param output Value to set to the output parameter */ void SetWaitSynchronizationOutput(s32 output); /** * Retrieves the index that this particular object occupies in the list of objects - * that the thread passed to WaitSynchronizationN, starting the search from the last element. - * It is used to set the output value of WaitSynchronizationN when the thread is awakened. + * that the thread passed to WaitSynchronization, starting the search from the last element. + * + * It is used to set the output index of WaitSynchronization when the thread is awakened. + * * When a thread wakes up due to an object signal, the kernel will use the index of the last * matching object in the wait objects list in case of having multiple instances of the same * object in the list. + * * @param object Object to query the index of. */ s32 GetWaitObjectIndex(const WaitObject* object) const; @@ -248,13 +255,9 @@ public: */ VAddr GetCommandBufferAddress() const; - /** - * Returns whether this thread is waiting for all the objects in - * its wait list to become ready, as a result of a WaitSynchronizationN call - * with wait_all = true. - */ - bool IsSleepingOnWaitAll() const { - return status == ThreadStatus::WaitSynchAll; + /// Returns whether this thread is waiting on objects from a WaitSynchronization call. + bool IsSleepingOnWait() const { + return status == ThreadStatus::WaitSynch; } ThreadContext& GetContext() { @@ -354,10 +357,6 @@ public: arb_wait_address = address; } - void SetGuestHandle(Handle handle) { - guest_handle = handle; - } - bool HasWakeupCallback() const { return wakeup_callback != nullptr; } @@ -432,7 +431,7 @@ private: Process* owner_process; /// Objects that the thread is waiting on, in the same order as they were - /// passed to WaitSynchronization1/N. + /// passed to WaitSynchronization. ThreadWaitObjects wait_objects; /// List of threads that are waiting for a mutex that is held by this thread. @@ -451,14 +450,11 @@ private: /// If waiting for an AddressArbiter, this is the address being waited on. VAddr arb_wait_address{0}; - /// Handle used by guest emulated application to access this thread - Handle guest_handle = 0; - /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. Handle callback_handle = 0; /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread - /// was waiting via WaitSynchronizationN then the object will be the last object that became + /// was waiting via WaitSynchronization then the object will be the last object that became /// available. In case of a timeout, the object will be nullptr. WakeupCallback wakeup_callback; diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index ec0a480ce..48b13cfdd 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -62,7 +62,7 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { return true; } -VMManager::VMManager() { +VMManager::VMManager(Core::System& system) : system{system} { // Default to assuming a 39-bit address space. This way we have a sane // starting point with executables that don't provide metadata. Reset(FileSys::ProgramAddressSpaceType::Is39Bit); @@ -111,7 +111,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, VirtualMemoryArea& final_vma = vma_handle->second; ASSERT(final_vma.size == size); - auto& system = Core::System::GetInstance(); system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset, VMAPermission::ReadWriteExecute); system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset, @@ -140,7 +139,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me VirtualMemoryArea& final_vma = vma_handle->second; ASSERT(final_vma.size == size); - auto& system = Core::System::GetInstance(); system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); @@ -223,7 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) { ASSERT(FindVMA(target)->second.size >= size); - auto& system = Core::System::GetInstance(); system.ArmInterface(0).UnmapMemory(target, size); system.ArmInterface(1).UnmapMemory(target, size); system.ArmInterface(2).UnmapMemory(target, size); @@ -302,6 +299,86 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { return MakeResult<VAddr>(heap_region_base); } +ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { + constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; + const auto src_check_result = CheckRangeState( + src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::All, + VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); + + if (src_check_result.Failed()) { + return src_check_result.Code(); + } + + const auto mirror_result = + MirrorMemory(dst_address, src_address, size, MemoryState::ModuleCode); + if (mirror_result.IsError()) { + return mirror_result; + } + + // Ensure we lock the source memory region. + const auto src_vma_result = CarveVMARange(src_address, size); + if (src_vma_result.Failed()) { + return src_vma_result.Code(); + } + auto src_vma_iter = *src_vma_result; + src_vma_iter->second.attribute = MemoryAttribute::Locked; + Reprotect(src_vma_iter, VMAPermission::Read); + + // The destination memory region is fine as is, however we need to make it read-only. + return ReprotectRange(dst_address, size, VMAPermission::Read); +} + +ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { + constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; + const auto src_check_result = CheckRangeState( + src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::None, + VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked, ignore_attribute); + + if (src_check_result.Failed()) { + return src_check_result.Code(); + } + + // Yes, the kernel only checks the first page of the region. + const auto dst_check_result = + CheckRangeState(dst_address, Memory::PAGE_SIZE, MemoryState::FlagModule, + MemoryState::FlagModule, VMAPermission::None, VMAPermission::None, + MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); + + if (dst_check_result.Failed()) { + return dst_check_result.Code(); + } + + const auto dst_memory_state = std::get<MemoryState>(*dst_check_result); + const auto dst_contiguous_check_result = CheckRangeState( + dst_address, size, MemoryState::All, dst_memory_state, VMAPermission::None, + VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); + + if (dst_contiguous_check_result.Failed()) { + return dst_contiguous_check_result.Code(); + } + + const auto unmap_result = UnmapRange(dst_address, size); + if (unmap_result.IsError()) { + return unmap_result; + } + + // With the mirrored portion unmapped, restore the original region's traits. + const auto src_vma_result = CarveVMARange(src_address, size); + if (src_vma_result.Failed()) { + return src_vma_result.Code(); + } + auto src_vma_iter = *src_vma_result; + src_vma_iter->second.state = MemoryState::Heap; + src_vma_iter->second.attribute = MemoryAttribute::None; + Reprotect(src_vma_iter, VMAPermission::ReadWrite); + + if (dst_memory_state == MemoryState::ModuleCode) { + system.InvalidateCpuInstructionCaches(); + } + + return unmap_result; +} + MemoryInfo VMManager::QueryMemory(VAddr address) const { const auto vma = FindVMA(address); MemoryInfo memory_info{}; diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 6f484b7bf..ec84d9a70 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -14,6 +14,10 @@ #include "core/hle/result.h" #include "core/memory.h" +namespace Core { +class System; +} + namespace FileSys { enum class ProgramAddressSpaceType : u8; } @@ -43,6 +47,9 @@ enum class VMAPermission : u8 { ReadExecute = Read | Execute, WriteExecute = Write | Execute, ReadWriteExecute = Read | Write | Execute, + + // Used as a wildcard when checking permissions across memory ranges + All = 0xFF, }; constexpr VMAPermission operator|(VMAPermission lhs, VMAPermission rhs) { @@ -152,6 +159,9 @@ enum class MemoryState : u32 { FlagUncached = 1U << 24, FlagCodeMemory = 1U << 25, + // Wildcard used in range checking to indicate all states. + All = 0xFFFFFFFF, + // Convenience flag sets to reduce repetition IPCFlags = FlagIPC0 | FlagIPC3 | FlagIPC1, @@ -315,7 +325,7 @@ class VMManager final { public: using VMAHandle = VMAMap::const_iterator; - VMManager(); + explicit VMManager(Core::System& system); ~VMManager(); /// Clears the address space map, re-initializing with a single free area. @@ -415,6 +425,49 @@ public: /// ResultVal<VAddr> SetHeapSize(u64 size); + /// Maps a region of memory as code memory. + /// + /// @param dst_address The base address of the region to create the aliasing memory region. + /// @param src_address The base address of the region to be aliased. + /// @param size The total amount of memory to map in bytes. + /// + /// @pre Both memory regions lie within the actual addressable address space. + /// + /// @post After this function finishes execution, assuming success, then the address range + /// [dst_address, dst_address+size) will alias the memory region, + /// [src_address, src_address+size). + /// <p> + /// What this also entails is as follows: + /// 1. The aliased region gains the Locked memory attribute. + /// 2. The aliased region becomes read-only. + /// 3. The aliasing region becomes read-only. + /// 4. The aliasing region is created with a memory state of MemoryState::CodeModule. + /// + ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size); + + /// Unmaps a region of memory designated as code module memory. + /// + /// @param dst_address The base address of the memory region aliasing the source memory region. + /// @param src_address The base address of the memory region being aliased. + /// @param size The size of the memory region to unmap in bytes. + /// + /// @pre Both memory ranges lie within the actual addressable address space. + /// + /// @pre The memory region being unmapped has been previously been mapped + /// by a call to MapCodeMemory. + /// + /// @post After execution of the function, if successful. the aliasing memory region + /// will be unmapped and the aliased region will have various traits about it + /// restored to what they were prior to the original mapping call preceding + /// this function call. + /// <p> + /// What this also entails is as follows: + /// 1. The state of the memory region will now indicate a general heap region. + /// 2. All memory attributes for the memory region are cleared. + /// 3. Memory permissions for the region are restored to user read/write. + /// + ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size); + /// Queries the memory manager for information about the given address. /// /// @param address The address to query the memory manager about for information. @@ -663,5 +716,7 @@ private: // The end of the currently allocated heap. This is not an inclusive // end of the range. This is essentially 'base_address + current_size'. VAddr heap_end = 0; + + Core::System& system; }; } // namespace Kernel diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 90580ed93..0e96ba872 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -30,7 +30,7 @@ void WaitObject::RemoveWaitingThread(Thread* thread) { waiting_threads.erase(itr); } -SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() { +SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() const { Thread* candidate = nullptr; u32 candidate_priority = THREADPRIO_LOWEST + 1; @@ -38,8 +38,7 @@ SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() { const ThreadStatus thread_status = thread->GetStatus(); // The list of waiting threads must not contain threads that are not waiting to be awakened. - ASSERT_MSG(thread_status == ThreadStatus::WaitSynchAny || - thread_status == ThreadStatus::WaitSynchAll || + ASSERT_MSG(thread_status == ThreadStatus::WaitSynch || thread_status == ThreadStatus::WaitHLEEvent, "Inconsistent thread statuses in waiting_threads"); @@ -49,10 +48,10 @@ SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() { if (ShouldWait(thread.get())) continue; - // A thread is ready to run if it's either in ThreadStatus::WaitSynchAny or - // in ThreadStatus::WaitSynchAll and the rest of the objects it is waiting on are ready. + // A thread is ready to run if it's either in ThreadStatus::WaitSynch + // and the rest of the objects it is waiting on are ready. bool ready_to_run = true; - if (thread_status == ThreadStatus::WaitSynchAll) { + if (thread_status == ThreadStatus::WaitSynch) { ready_to_run = thread->AllWaitObjectsReady(); } @@ -68,33 +67,35 @@ SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() { void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) { ASSERT(!ShouldWait(thread.get())); - if (!thread) + if (!thread) { return; + } - if (!thread->IsSleepingOnWaitAll()) { - Acquire(thread.get()); - } else { + if (thread->IsSleepingOnWait()) { for (const auto& object : thread->GetWaitObjects()) { ASSERT(!object->ShouldWait(thread.get())); object->Acquire(thread.get()); } + } else { + Acquire(thread.get()); } const std::size_t index = thread->GetWaitObjectIndex(this); - for (const auto& object : thread->GetWaitObjects()) + for (const auto& object : thread->GetWaitObjects()) { object->RemoveWaitingThread(thread.get()); + } thread->ClearWaitObjects(); thread->CancelWakeupTimer(); bool resume = true; - - if (thread->HasWakeupCallback()) + if (thread->HasWakeupCallback()) { resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Signal, thread, this, index); - - if (resume) + } + if (resume) { thread->ResumeFromWait(); + } } void WaitObject::WakeupAllWaitingThreads() { diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h index 04464a51a..3271a30a7 100644 --- a/src/core/hle/kernel/wait_object.h +++ b/src/core/hle/kernel/wait_object.h @@ -54,7 +54,7 @@ public: void WakeupWaitingThread(SharedPtr<Thread> thread); /// Obtains the highest priority thread that is ready to run from this object's waiting list. - SharedPtr<Thread> GetHighestPriorityReadyThread(); + SharedPtr<Thread> GetHighestPriorityReadyThread() const; /// Get a const reference to the waiting threads list for debug use const std::vector<SharedPtr<Thread>>& GetWaitingThreads() const; diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 1f8ed265e..ba7d7acbd 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -137,6 +137,7 @@ private: class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { public: IManagerForApplication() : ServiceFramework("IManagerForApplication") { + // clang-format off static const FunctionInfo functions[] = { {0, &IManagerForApplication::CheckAvailability, "CheckAvailability"}, {1, &IManagerForApplication::GetAccountId, "GetAccountId"}, @@ -145,7 +146,10 @@ public: {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"}, {150, nullptr, "CreateAuthorizationRequest"}, {160, nullptr, "StoreOpenContext"}, + {170, nullptr, "LoadNetworkServiceLicenseKindAsync"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index 5e2030355..d66233cad 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp @@ -8,6 +8,7 @@ namespace Service::Account { ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager) : Module::Interface(std::move(module), std::move(profile_manager), "acc:su") { + // clang-format off static const FunctionInfo functions[] = { {0, &ACC_SU::GetUserCount, "GetUserCount"}, {1, &ACC_SU::GetUserExistence, "GetUserExistence"}, @@ -19,6 +20,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, {60, nullptr, "ListOpenContextStoredUsers"}, + {99, nullptr, "DebugActivateOpenContextRetention"}, {100, nullptr, "GetUserRegistrationNotifier"}, {101, nullptr, "GetUserStateChangeNotifier"}, {102, nullptr, "GetBaasAccountManagerForSystemService"}, @@ -29,6 +31,8 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {111, nullptr, "ClearSaveDataThumbnail"}, {112, nullptr, "LoadSaveDataThumbnail"}, {113, nullptr, "GetSaveDataThumbnailExistence"}, + {130, nullptr, "ActivateOpenContextRetention"}, + {140, nullptr, "ListQualifiedUsers"}, {190, nullptr, "GetUserLastOpenedApplication"}, {191, nullptr, "ActivateOpenContextHolder"}, {200, nullptr, "BeginUserRegistration"}, @@ -48,6 +52,8 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {998, nullptr, "DebugSetUserStateClose"}, {999, nullptr, "DebugSetUserStateOpen"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp index a4d705b45..182f7c7e5 100644 --- a/src/core/hle/service/acc/acc_u0.cpp +++ b/src/core/hle/service/acc/acc_u0.cpp @@ -8,6 +8,7 @@ namespace Service::Account { ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager) : Module::Interface(std::move(module), std::move(profile_manager), "acc:u0") { + // clang-format off static const FunctionInfo functions[] = { {0, &ACC_U0::GetUserCount, "GetUserCount"}, {1, &ACC_U0::GetUserExistence, "GetUserExistence"}, @@ -19,6 +20,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, {60, nullptr, "ListOpenContextStoredUsers"}, + {99, nullptr, "DebugActivateOpenContextRetention"}, {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"}, {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, {102, nullptr, "AuthenticateApplicationAsync"}, @@ -27,7 +29,13 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {111, nullptr, "ClearSaveDataThumbnail"}, {120, nullptr, "CreateGuestLoginRequest"}, {130, nullptr, "LoadOpenContext"}, + {131, nullptr, "ListOpenContextStoredUsers"}, + {140, nullptr, "InitializeApplicationInfo"}, + {141, nullptr, "ListQualifiedUsers"}, + {150, nullptr, "IsUserAccountSwitchLocked"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp index 8fffc93b5..2dd17d935 100644 --- a/src/core/hle/service/acc/acc_u1.cpp +++ b/src/core/hle/service/acc/acc_u1.cpp @@ -8,6 +8,7 @@ namespace Service::Account { ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager) : Module::Interface(std::move(module), std::move(profile_manager), "acc:u1") { + // clang-format off static const FunctionInfo functions[] = { {0, &ACC_U1::GetUserCount, "GetUserCount"}, {1, &ACC_U1::GetUserExistence, "GetUserExistence"}, @@ -19,6 +20,7 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, {60, nullptr, "ListOpenContextStoredUsers"}, + {99, nullptr, "DebugActivateOpenContextRetention"}, {100, nullptr, "GetUserRegistrationNotifier"}, {101, nullptr, "GetUserStateChangeNotifier"}, {102, nullptr, "GetBaasAccountManagerForSystemService"}, @@ -29,12 +31,16 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {111, nullptr, "ClearSaveDataThumbnail"}, {112, nullptr, "LoadSaveDataThumbnail"}, {113, nullptr, "GetSaveDataThumbnailExistence"}, + {130, nullptr, "ActivateOpenContextRetention"}, + {140, nullptr, "ListQualifiedUsers"}, {190, nullptr, "GetUserLastOpenedApplication"}, {191, nullptr, "ActivateOpenContextHolder"}, {997, nullptr, "DebugInvalidateTokenCacheForUser"}, {998, nullptr, "DebugSetUserStateClose"}, {999, nullptr, "DebugSetUserStateOpen"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 85271d418..1a32a109f 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -22,7 +22,6 @@ #include "core/hle/service/am/applets/applets.h" #include "core/hle/service/am/applets/profile_select.h" #include "core/hle/service/am/applets/software_keyboard.h" -#include "core/hle/service/am/applets/stub_applet.h" #include "core/hle/service/am/applets/web_browser.h" #include "core/hle/service/am/idle.h" #include "core/hle/service/am/omm.h" @@ -42,12 +41,6 @@ constexpr ResultCode ERR_NO_DATA_IN_CHANNEL{ErrorModule::AM, 0x2}; constexpr ResultCode ERR_NO_MESSAGES{ErrorModule::AM, 0x3}; constexpr ResultCode ERR_SIZE_OUT_OF_BOUNDS{ErrorModule::AM, 0x1F7}; -enum class AppletId : u32 { - ProfileSelect = 0x10, - SoftwareKeyboard = 0x11, - LibAppletOff = 0x17, -}; - constexpr u32 POP_LAUNCH_PARAMETER_MAGIC = 0xC79497CA; struct LaunchParameters { @@ -224,6 +217,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} { {20, nullptr, "InvalidateTransitionLayer"}, {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"}, {40, nullptr, "GetAppletResourceUsageInfo"}, + {41, nullptr, "SetCpuBoostModeForApplet"}, }; // clang-format on @@ -256,6 +250,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"}, {41, nullptr, "IsSystemBufferSharingEnabled"}, {42, nullptr, "GetSystemSharedLayerHandle"}, + {43, nullptr, "GetSystemSharedBufferHandle"}, {50, &ISelfController::SetHandlesRequestToDisplay, "SetHandlesRequestToDisplay"}, {51, nullptr, "ApproveToDisplay"}, {60, nullptr, "OverrideAutoSleepTimeAndDimmingTime"}, @@ -269,9 +264,11 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger {68, nullptr, "SetAutoSleepDisabled"}, {69, nullptr, "IsAutoSleepDisabled"}, {70, nullptr, "ReportMultimediaError"}, + {71, nullptr, "GetCurrentIlluminanceEx"}, {80, nullptr, "SetWirelessPriorityMode"}, {90, nullptr, "GetAccumulatedSuspendedTickValue"}, {91, nullptr, "GetAccumulatedSuspendedTickChangedEvent"}, + {100, nullptr, "SetAlbumImageTakenNotificationEnabled"}, {1000, nullptr, "GetDebugStorageChannel"}, }; // clang-format on @@ -279,7 +276,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); - launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, + launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "ISelfController:LaunchableEvent"); } @@ -445,10 +442,10 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c AppletMessageQueue::AppletMessageQueue() { auto& kernel = Core::System::GetInstance().Kernel(); - on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, + on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "AMMessageQueue:OnMessageRecieved"); on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::OneShot, "AMMessageQueue:OperationModeChanged"); + kernel, Kernel::ResetType::Automatic, "AMMessageQueue:OperationModeChanged"); } AppletMessageQueue::~AppletMessageQueue() = default; @@ -516,11 +513,20 @@ ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_q {50, nullptr, "IsVrModeEnabled"}, {51, nullptr, "SetVrModeEnabled"}, {52, nullptr, "SwitchLcdBacklight"}, + {53, nullptr, "BeginVrModeEx"}, + {54, nullptr, "EndVrModeEx"}, {55, nullptr, "IsInControllerFirmwareUpdateSection"}, {60, &ICommonStateGetter::GetDefaultDisplayResolution, "GetDefaultDisplayResolution"}, {61, &ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent, "GetDefaultDisplayResolutionChangeEvent"}, {62, nullptr, "GetHdcpAuthenticationState"}, {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, + {64, nullptr, "SetTvPowerStateMatchingMode"}, + {65, nullptr, "GetApplicationIdByContentActionName"}, + {66, nullptr, "SetCpuBoostMode"}, + {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, + {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, + {91, nullptr, "GetCurrentPerformanceConfiguration"}, + {200, nullptr, "GetOperationModeSystemInfo"}, }; // clang-format on @@ -829,6 +835,7 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_SIZE_OUT_OF_BOUNDS); + return; } std::memcpy(backing.buffer.data() + offset, data.data(), data.size()); @@ -851,6 +858,7 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_SIZE_OUT_OF_BOUNDS); + return; } ctx.WriteBuffer(backing.buffer.data() + offset, size); @@ -873,30 +881,16 @@ ILibraryAppletCreator::ILibraryAppletCreator() : ServiceFramework("ILibraryApple ILibraryAppletCreator::~ILibraryAppletCreator() = default; -static std::shared_ptr<Applets::Applet> GetAppletFromId(AppletId id) { - switch (id) { - case AppletId::ProfileSelect: - return std::make_shared<Applets::ProfileSelect>(); - case AppletId::SoftwareKeyboard: - return std::make_shared<Applets::SoftwareKeyboard>(); - case AppletId::LibAppletOff: - return std::make_shared<Applets::WebBrowser>(); - default: - LOG_ERROR(Service_AM, "Unimplemented AppletId [{:08X}]! -- Falling back to stub!", - static_cast<u32>(id)); - return std::make_shared<Applets::StubApplet>(); - } -} - void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - const auto applet_id = rp.PopRaw<AppletId>(); + const auto applet_id = rp.PopRaw<Applets::AppletId>(); const auto applet_mode = rp.PopRaw<u32>(); LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", static_cast<u32>(applet_id), applet_mode); - const auto applet = GetAppletFromId(applet_id); + const auto& applet_manager{Core::System::GetInstance().GetAppletManager()}; + const auto applet = applet_manager.GetApplet(applet_id); if (applet == nullptr) { LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id)); @@ -960,6 +954,8 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF {11, nullptr, "CreateApplicationAndPushAndRequestToStartForQuest"}, {12, nullptr, "CreateApplicationAndRequestToStart"}, {13, &IApplicationFunctions::CreateApplicationAndRequestToStartForQuest, "CreateApplicationAndRequestToStartForQuest"}, + {14, nullptr, "CreateApplicationWithAttributeAndPushAndRequestToStartForQuest"}, + {15, nullptr, "CreateApplicationWithAttributeAndRequestToStartForQuest"}, {20, &IApplicationFunctions::EnsureSaveData, "EnsureSaveData"}, {21, &IApplicationFunctions::GetDesiredLanguage, "GetDesiredLanguage"}, {22, &IApplicationFunctions::SetTerminateResult, "SetTerminateResult"}, @@ -1233,6 +1229,7 @@ IGlobalStateController::IGlobalStateController() : ServiceFramework("IGlobalStat {2, nullptr, "StartSleepSequence"}, {3, nullptr, "StartShutdownSequence"}, {4, nullptr, "StartRebootSequence"}, + {9, nullptr, "IsAutoPowerDownRequested"}, {10, nullptr, "LoadAndApplyIdlePolicySettings"}, {11, nullptr, "NotifyCecSettingsChanged"}, {12, nullptr, "SetDefaultHomeButtonLongPressTime"}, diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp index b888f861d..488add8e7 100644 --- a/src/core/hle/service/am/applet_ae.cpp +++ b/src/core/hle/service/am/applet_ae.cpp @@ -16,6 +16,7 @@ public: std::shared_ptr<AppletMessageQueue> msg_queue) : ServiceFramework("ILibraryAppletProxy"), nvflinger(std::move(nvflinger)), msg_queue(std::move(msg_queue)) { + // clang-format off static const FunctionInfo functions[] = { {0, &ILibraryAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"}, {1, &ILibraryAppletProxy::GetSelfController, "GetSelfController"}, @@ -25,8 +26,11 @@ public: {10, &ILibraryAppletProxy::GetProcessWindingController, "GetProcessWindingController"}, {11, &ILibraryAppletProxy::GetLibraryAppletCreator, "GetLibraryAppletCreator"}, {20, &ILibraryAppletProxy::GetApplicationFunctions, "GetApplicationFunctions"}, + {21, nullptr, "GetAppletCommonFunctions"}, {1000, &ILibraryAppletProxy::GetDebugFunctions, "GetDebugFunctions"}, }; + // clang-format on + RegisterHandlers(functions); } @@ -113,6 +117,7 @@ public: std::shared_ptr<AppletMessageQueue> msg_queue) : ServiceFramework("ISystemAppletProxy"), nvflinger(std::move(nvflinger)), msg_queue(std::move(msg_queue)) { + // clang-format off static const FunctionInfo functions[] = { {0, &ISystemAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"}, {1, &ISystemAppletProxy::GetSelfController, "GetSelfController"}, @@ -124,8 +129,11 @@ public: {20, &ISystemAppletProxy::GetHomeMenuFunctions, "GetHomeMenuFunctions"}, {21, &ISystemAppletProxy::GetGlobalStateController, "GetGlobalStateController"}, {22, &ISystemAppletProxy::GetApplicationCreator, "GetApplicationCreator"}, + {23, nullptr, "GetAppletCommonFunctions"}, {1000, &ISystemAppletProxy::GetDebugFunctions, "GetDebugFunctions"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp index a6064c63f..e812c66e9 100644 --- a/src/core/hle/service/am/applets/applets.cpp +++ b/src/core/hle/service/am/applets/applets.cpp @@ -5,22 +5,32 @@ #include <cstring> #include "common/assert.h" #include "core/core.h" +#include "core/frontend/applets/error.h" +#include "core/frontend/applets/general_frontend.h" +#include "core/frontend/applets/profile_select.h" +#include "core/frontend/applets/software_keyboard.h" +#include "core/frontend/applets/web_browser.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/writable_event.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/applets.h" +#include "core/hle/service/am/applets/error.h" +#include "core/hle/service/am/applets/general_backend.h" +#include "core/hle/service/am/applets/profile_select.h" +#include "core/hle/service/am/applets/software_keyboard.h" +#include "core/hle/service/am/applets/web_browser.h" namespace Service::AM::Applets { AppletDataBroker::AppletDataBroker() { auto& kernel = Core::System::GetInstance().Kernel(); state_changed_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:StateChangedEvent"); + kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent"); pop_out_data_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopDataOutEvent"); + kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent"); pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopInteractiveDataOutEvent"); + kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent"); } AppletDataBroker::~AppletDataBroker() = default; @@ -111,4 +121,76 @@ void Applet::Initialize() { initialized = true; } +AppletManager::AppletManager() = default; + +AppletManager::~AppletManager() = default; + +void AppletManager::SetAppletFrontendSet(AppletFrontendSet set) { + if (set.error != nullptr) + frontend.error = std::move(set.error); + if (set.photo_viewer != nullptr) + frontend.photo_viewer = std::move(set.photo_viewer); + if (set.profile_select != nullptr) + frontend.profile_select = std::move(set.profile_select); + if (set.software_keyboard != nullptr) + frontend.software_keyboard = std::move(set.software_keyboard); + if (set.web_browser != nullptr) + frontend.web_browser = std::move(set.web_browser); +} + +void AppletManager::SetDefaultAppletFrontendSet() { + frontend.error = std::make_unique<Core::Frontend::DefaultErrorApplet>(); + frontend.photo_viewer = std::make_unique<Core::Frontend::DefaultPhotoViewerApplet>(); + frontend.profile_select = std::make_unique<Core::Frontend::DefaultProfileSelectApplet>(); + frontend.software_keyboard = std::make_unique<Core::Frontend::DefaultSoftwareKeyboardApplet>(); + frontend.web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); +} + +void AppletManager::SetDefaultAppletsIfMissing() { + if (frontend.error == nullptr) { + frontend.error = std::make_unique<Core::Frontend::DefaultErrorApplet>(); + } + + if (frontend.photo_viewer == nullptr) { + frontend.photo_viewer = std::make_unique<Core::Frontend::DefaultPhotoViewerApplet>(); + } + + if (frontend.profile_select == nullptr) { + frontend.profile_select = std::make_unique<Core::Frontend::DefaultProfileSelectApplet>(); + } + + if (frontend.software_keyboard == nullptr) { + frontend.software_keyboard = + std::make_unique<Core::Frontend::DefaultSoftwareKeyboardApplet>(); + } + + if (frontend.web_browser == nullptr) { + frontend.web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); + } +} + +void AppletManager::ClearAll() { + frontend = {}; +} + +std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id) const { + switch (id) { + case AppletId::Error: + return std::make_shared<Error>(*frontend.error); + case AppletId::ProfileSelect: + return std::make_shared<ProfileSelect>(*frontend.profile_select); + case AppletId::SoftwareKeyboard: + return std::make_shared<SoftwareKeyboard>(*frontend.software_keyboard); + case AppletId::PhotoViewer: + return std::make_shared<PhotoViewer>(*frontend.photo_viewer); + case AppletId::LibAppletOff: + return std::make_shared<WebBrowser>(*frontend.web_browser); + default: + UNIMPLEMENTED_MSG( + "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.", + static_cast<u8>(id)); + return std::make_shared<StubApplet>(); + } +} + } // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h index 37424c379..7f932672c 100644 --- a/src/core/hle/service/am/applets/applets.h +++ b/src/core/hle/service/am/applets/applets.h @@ -12,12 +12,43 @@ union ResultCode; +namespace Core::Frontend { +class ErrorApplet; +class PhotoViewerApplet; +class ProfileSelectApplet; +class SoftwareKeyboardApplet; +class WebBrowserApplet; +} // namespace Core::Frontend + namespace Service::AM { class IStorage; namespace Applets { +enum class AppletId : u32 { + OverlayDisplay = 0x02, + QLaunch = 0x03, + Starter = 0x04, + Auth = 0x0A, + Cabinet = 0x0B, + Controller = 0x0C, + DataErase = 0x0D, + Error = 0x0E, + NetConnect = 0x0F, + ProfileSelect = 0x10, + SoftwareKeyboard = 0x11, + MiiEdit = 0x12, + LibAppletWeb = 0x13, + LibAppletShop = 0x14, + PhotoViewer = 0x15, + Settings = 0x16, + LibAppletOff = 0x17, + LibAppletWhitelisted = 0x18, + LibAppletAuth = 0x19, + MyPage = 0x1A, +}; + class AppletDataBroker final { public: AppletDataBroker(); @@ -105,5 +136,29 @@ protected: bool initialized = false; }; +struct AppletFrontendSet { + std::unique_ptr<Core::Frontend::ErrorApplet> error; + std::unique_ptr<Core::Frontend::PhotoViewerApplet> photo_viewer; + std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_select; + std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard; + std::unique_ptr<Core::Frontend::WebBrowserApplet> web_browser; +}; + +class AppletManager { +public: + AppletManager(); + ~AppletManager(); + + void SetAppletFrontendSet(AppletFrontendSet set); + void SetDefaultAppletFrontendSet(); + void SetDefaultAppletsIfMissing(); + void ClearAll(); + + std::shared_ptr<Applet> GetApplet(AppletId id) const; + +private: + AppletFrontendSet frontend; +}; + } // namespace Applets } // namespace Service::AM diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp new file mode 100644 index 000000000..04774bedc --- /dev/null +++ b/src/core/hle/service/am/applets/error.cpp @@ -0,0 +1,182 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <cstring> +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/string_util.h" +#include "core/core.h" +#include "core/frontend/applets/error.h" +#include "core/hle/service/am/am.h" +#include "core/hle/service/am/applets/error.h" + +namespace Service::AM::Applets { + +#pragma pack(push, 4) +struct ShowError { + u8 mode; + bool jump; + INSERT_PADDING_BYTES(4); + bool use_64bit_error_code; + INSERT_PADDING_BYTES(1); + u64 error_code_64; + u32 error_code_32; +}; +static_assert(sizeof(ShowError) == 0x14, "ShowError has incorrect size."); +#pragma pack(pop) + +struct ShowErrorRecord { + u8 mode; + bool jump; + INSERT_PADDING_BYTES(6); + u64 error_code_64; + u64 posix_time; +}; +static_assert(sizeof(ShowErrorRecord) == 0x18, "ShowErrorRecord has incorrect size."); + +struct SystemErrorArg { + u8 mode; + bool jump; + INSERT_PADDING_BYTES(6); + u64 error_code_64; + std::array<char, 8> language_code; + std::array<char, 0x800> main_text; + std::array<char, 0x800> detail_text; +}; +static_assert(sizeof(SystemErrorArg) == 0x1018, "SystemErrorArg has incorrect size."); + +struct ApplicationErrorArg { + u8 mode; + bool jump; + INSERT_PADDING_BYTES(6); + u32 error_code; + std::array<char, 8> language_code; + std::array<char, 0x800> main_text; + std::array<char, 0x800> detail_text; +}; +static_assert(sizeof(ApplicationErrorArg) == 0x1014, "ApplicationErrorArg has incorrect size."); + +union Error::ErrorArguments { + ShowError error; + ShowErrorRecord error_record; + SystemErrorArg system_error; + ApplicationErrorArg application_error; +}; + +namespace { +template <typename T> +void CopyArgumentData(const std::vector<u8>& data, T& variable) { + ASSERT(data.size() >= sizeof(T)); + std::memcpy(&variable, data.data(), sizeof(T)); +} + +ResultCode Decode64BitError(u64 error) { + const auto description = (error >> 32) & 0x1FFF; + auto module = error & 0x3FF; + if (module >= 2000) + module -= 2000; + module &= 0x1FF; + return {static_cast<ErrorModule>(module), static_cast<u32>(description)}; +} + +} // Anonymous namespace + +Error::Error(const Core::Frontend::ErrorApplet& frontend) : frontend(frontend) {} + +Error::~Error() = default; + +void Error::Initialize() { + Applet::Initialize(); + args = std::make_unique<ErrorArguments>(); + complete = false; + + const auto storage = broker.PopNormalDataToApplet(); + ASSERT(storage != nullptr); + const auto data = storage->GetData(); + + ASSERT(!data.empty()); + std::memcpy(&mode, data.data(), sizeof(ErrorAppletMode)); + + switch (mode) { + case ErrorAppletMode::ShowError: + CopyArgumentData(data, args->error); + if (args->error.use_64bit_error_code) { + error_code = Decode64BitError(args->error.error_code_64); + } else { + error_code = ResultCode(args->error.error_code_32); + } + break; + case ErrorAppletMode::ShowSystemError: + CopyArgumentData(data, args->system_error); + error_code = ResultCode(Decode64BitError(args->system_error.error_code_64)); + break; + case ErrorAppletMode::ShowApplicationError: + CopyArgumentData(data, args->application_error); + error_code = ResultCode(args->application_error.error_code); + break; + case ErrorAppletMode::ShowErrorRecord: + CopyArgumentData(data, args->error_record); + error_code = Decode64BitError(args->error_record.error_code_64); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", static_cast<u8>(mode)); + } +} + +bool Error::TransactionComplete() const { + return complete; +} + +ResultCode Error::GetStatus() const { + return RESULT_SUCCESS; +} + +void Error::ExecuteInteractive() { + UNREACHABLE_MSG("Unexpected interactive applet data!"); +} + +void Error::Execute() { + if (complete) { + return; + } + + const auto callback = [this] { DisplayCompleted(); }; + + switch (mode) { + case ErrorAppletMode::ShowError: + frontend.ShowError(error_code, callback); + break; + case ErrorAppletMode::ShowSystemError: + case ErrorAppletMode::ShowApplicationError: { + const auto system = mode == ErrorAppletMode::ShowSystemError; + const auto& main_text = + system ? args->system_error.main_text : args->application_error.main_text; + const auto& detail_text = + system ? args->system_error.detail_text : args->application_error.detail_text; + + frontend.ShowCustomErrorText( + error_code, + Common::StringFromFixedZeroTerminatedBuffer(main_text.data(), main_text.size()), + Common::StringFromFixedZeroTerminatedBuffer(detail_text.data(), detail_text.size()), + callback); + break; + } + case ErrorAppletMode::ShowErrorRecord: + frontend.ShowErrorWithTimestamp( + error_code, std::chrono::seconds{args->error_record.posix_time}, callback); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", static_cast<u8>(mode)); + DisplayCompleted(); + } +} + +void Error::DisplayCompleted() { + complete = true; + broker.PushNormalDataFromApplet(IStorage{{}}); + broker.SignalStateChanged(); +} + +} // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/error.h b/src/core/hle/service/am/applets/error.h new file mode 100644 index 000000000..a3590d181 --- /dev/null +++ b/src/core/hle/service/am/applets/error.h @@ -0,0 +1,47 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hle/result.h" +#include "core/hle/service/am/applets/applets.h" + +namespace Service::AM::Applets { + +enum class ErrorAppletMode : u8 { + ShowError = 0, + ShowSystemError = 1, + ShowApplicationError = 2, + ShowEula = 3, + ShowErrorPctl = 4, + ShowErrorRecord = 5, + ShowUpdateEula = 8, +}; + +class Error final : public Applet { +public: + explicit Error(const Core::Frontend::ErrorApplet& frontend); + ~Error() override; + + void Initialize() override; + + bool TransactionComplete() const override; + ResultCode GetStatus() const override; + void ExecuteInteractive() override; + void Execute() override; + + void DisplayCompleted(); + +private: + union ErrorArguments; + + const Core::Frontend::ErrorApplet& frontend; + ResultCode error_code = RESULT_SUCCESS; + ErrorAppletMode mode = ErrorAppletMode::ShowError; + std::unique_ptr<ErrorArguments> args; + + bool complete = false; +}; + +} // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/stub_applet.cpp b/src/core/hle/service/am/applets/general_backend.cpp index ed166b87d..c591b9ac2 100644 --- a/src/core/hle/service/am/applets/stub_applet.cpp +++ b/src/core/hle/service/am/applets/general_backend.cpp @@ -4,11 +4,15 @@ #include <string> +#include "common/assert.h" #include "common/hex_util.h" #include "common/logging/log.h" +#include "core/core.h" +#include "core/frontend/applets/general_frontend.h" +#include "core/hle/kernel/process.h" #include "core/hle/result.h" #include "core/hle/service/am/am.h" -#include "core/hle/service/am/applets/stub_applet.h" +#include "core/hle/service/am/applets/general_backend.h" namespace Service::AM::Applets { @@ -30,6 +34,55 @@ static void LogCurrentStorage(AppletDataBroker& broker, std::string prefix) { } } +PhotoViewer::PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend) : frontend(frontend) {} + +PhotoViewer::~PhotoViewer() = default; + +void PhotoViewer::Initialize() { + Applet::Initialize(); + complete = false; + + const auto storage = broker.PopNormalDataToApplet(); + ASSERT(storage != nullptr); + const auto data = storage->GetData(); + ASSERT(!data.empty()); + mode = static_cast<PhotoViewerAppletMode>(data[0]); +} + +bool PhotoViewer::TransactionComplete() const { + return complete; +} + +ResultCode PhotoViewer::GetStatus() const { + return RESULT_SUCCESS; +} + +void PhotoViewer::ExecuteInteractive() { + UNREACHABLE_MSG("Unexpected interactive applet data."); +} + +void PhotoViewer::Execute() { + if (complete) + return; + + const auto callback = [this] { ViewFinished(); }; + switch (mode) { + case PhotoViewerAppletMode::CurrentApp: + frontend.ShowPhotosForApplication(Core::CurrentProcess()->GetTitleID(), callback); + break; + case PhotoViewerAppletMode::AllApps: + frontend.ShowAllPhotos(callback); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented PhotoViewer applet mode={:02X}!", static_cast<u8>(mode)); + } +} + +void PhotoViewer::ViewFinished() { + broker.PushNormalDataFromApplet(IStorage{{}}); + broker.SignalStateChanged(); +} + StubApplet::StubApplet() = default; StubApplet::~StubApplet() = default; @@ -67,4 +120,5 @@ void StubApplet::Execute() { broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)}); broker.SignalStateChanged(); } + } // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/general_backend.h b/src/core/hle/service/am/applets/general_backend.h new file mode 100644 index 000000000..2dd255d7c --- /dev/null +++ b/src/core/hle/service/am/applets/general_backend.h @@ -0,0 +1,48 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hle/service/am/applets/applets.h" + +namespace Service::AM::Applets { + +enum class PhotoViewerAppletMode : u8 { + CurrentApp = 0, + AllApps = 1, +}; + +class PhotoViewer final : public Applet { +public: + explicit PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend); + ~PhotoViewer() override; + + void Initialize() override; + bool TransactionComplete() const override; + ResultCode GetStatus() const override; + void ExecuteInteractive() override; + void Execute() override; + + void ViewFinished(); + +private: + const Core::Frontend::PhotoViewerApplet& frontend; + bool complete = false; + PhotoViewerAppletMode mode = PhotoViewerAppletMode::CurrentApp; +}; + +class StubApplet final : public Applet { +public: + StubApplet(); + ~StubApplet() override; + + void Initialize() override; + + bool TransactionComplete() const override; + ResultCode GetStatus() const override; + void ExecuteInteractive() override; + void Execute() override; +}; + +} // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp index 14e2a1fee..d113bd2eb 100644 --- a/src/core/hle/service/am/applets/profile_select.cpp +++ b/src/core/hle/service/am/applets/profile_select.cpp @@ -15,7 +15,9 @@ namespace Service::AM::Applets { constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1}; -ProfileSelect::ProfileSelect() = default; +ProfileSelect::ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend) + : frontend(frontend) {} + ProfileSelect::~ProfileSelect() = default; void ProfileSelect::Initialize() { @@ -51,8 +53,6 @@ void ProfileSelect::Execute() { return; } - const auto& frontend{Core::System::GetInstance().GetProfileSelector()}; - frontend.SelectProfile([this](std::optional<Account::UUID> uuid) { SelectionComplete(uuid); }); } diff --git a/src/core/hle/service/am/applets/profile_select.h b/src/core/hle/service/am/applets/profile_select.h index 787485f22..a2ac6cf50 100644 --- a/src/core/hle/service/am/applets/profile_select.h +++ b/src/core/hle/service/am/applets/profile_select.h @@ -28,7 +28,7 @@ static_assert(sizeof(UserSelectionOutput) == 0x18, "UserSelectionOutput has inco class ProfileSelect final : public Applet { public: - ProfileSelect(); + explicit ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend); ~ProfileSelect() override; void Initialize() override; @@ -41,6 +41,8 @@ public: void SelectionComplete(std::optional<Account::UUID> uuid); private: + const Core::Frontend::ProfileSelectApplet& frontend; + UserSelectionConfig config; bool complete = false; ResultCode status = RESULT_SUCCESS; diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index 8c5bd6059..e197990f7 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp @@ -39,7 +39,8 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters( return params; } -SoftwareKeyboard::SoftwareKeyboard() = default; +SoftwareKeyboard::SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend) + : frontend(frontend) {} SoftwareKeyboard::~SoftwareKeyboard() = default; @@ -90,8 +91,6 @@ void SoftwareKeyboard::ExecuteInteractive() { if (status == INTERACTIVE_STATUS_OK) { complete = true; } else { - const auto& frontend{Core::System::GetInstance().GetSoftwareKeyboard()}; - std::array<char16_t, SWKBD_OUTPUT_INTERACTIVE_BUFFER_SIZE / 2 - 2> string; std::memcpy(string.data(), data.data() + 4, string.size() * 2); frontend.SendTextCheckDialog( @@ -106,8 +105,6 @@ void SoftwareKeyboard::Execute() { return; } - const auto& frontend{Core::System::GetInstance().GetSoftwareKeyboard()}; - const auto parameters = ConvertToFrontendParameters(config, initial_text); frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); }, diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h index b93a30d28..0fbc43e51 100644 --- a/src/core/hle/service/am/applets/software_keyboard.h +++ b/src/core/hle/service/am/applets/software_keyboard.h @@ -55,7 +55,7 @@ static_assert(sizeof(KeyboardConfig) == 0x3E0, "KeyboardConfig has incorrect siz class SoftwareKeyboard final : public Applet { public: - SoftwareKeyboard(); + explicit SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend); ~SoftwareKeyboard() override; void Initialize() override; @@ -68,6 +68,8 @@ public: void WriteText(std::optional<std::u16string> text); private: + const Core::Frontend::SoftwareKeyboardApplet& frontend; + KeyboardConfig config; std::u16string initial_text; bool complete = false; diff --git a/src/core/hle/service/am/applets/stub_applet.h b/src/core/hle/service/am/applets/stub_applet.h deleted file mode 100644 index 7d8dc968d..000000000 --- a/src/core/hle/service/am/applets/stub_applet.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "core/hle/service/am/applets/applets.h" - -namespace Service::AM::Applets { - -class StubApplet final : public Applet { -public: - StubApplet(); - ~StubApplet() override; - - void Initialize() override; - - bool TransactionComplete() const override; - ResultCode GetStatus() const override; - void ExecuteInteractive() override; - void Execute() override; -}; - -} // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp index 7e17df98a..7878f5136 100644 --- a/src/core/hle/service/am/applets/web_browser.cpp +++ b/src/core/hle/service/am/applets/web_browser.cpp @@ -95,7 +95,7 @@ static FileSys::VirtualFile GetManualRomFS() { return nullptr; } -WebBrowser::WebBrowser() = default; +WebBrowser::WebBrowser(Core::Frontend::WebBrowserApplet& frontend) : frontend(frontend) {} WebBrowser::~WebBrowser() = default; @@ -152,8 +152,6 @@ void WebBrowser::Execute() { return; } - auto& frontend{Core::System::GetInstance().GetWebBrowser()}; - frontend.OpenPage(filename, [this] { UnpackRomFS(); }, [this] { Finalize(); }); } diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h index b9e228fac..7e0f34c7d 100644 --- a/src/core/hle/service/am/applets/web_browser.h +++ b/src/core/hle/service/am/applets/web_browser.h @@ -12,7 +12,7 @@ namespace Service::AM::Applets { class WebBrowser final : public Applet { public: - WebBrowser(); + WebBrowser(Core::Frontend::WebBrowserApplet& frontend); ~WebBrowser() override; void Initialize() override; @@ -32,6 +32,8 @@ public: void Finalize(); private: + Core::Frontend::WebBrowserApplet& frontend; + bool complete = false; bool unpacked = false; ResultCode status = RESULT_SUCCESS; diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp index 2d768d9fc..bd4e38461 100644 --- a/src/core/hle/service/aoc/aoc_u.cpp +++ b/src/core/hle/service/aoc/aoc_u.cpp @@ -50,6 +50,7 @@ static std::vector<u64> AccumulateAOCTitleIDs() { } AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs()) { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "CountAddOnContentByApplicationId"}, {1, nullptr, "ListAddOnContentByApplicationId"}, @@ -60,11 +61,14 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs {6, nullptr, "PrepareAddOnContentByApplicationId"}, {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, + {100, nullptr, "CreateEcPurchasedEventManager"}, }; + // clang-format on + RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); - aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, + aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "GetAddOnContentListChanged:Event"); } diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp index fcacbab72..d058c0245 100644 --- a/src/core/hle/service/apm/interface.cpp +++ b/src/core/hle/service/apm/interface.cpp @@ -87,6 +87,8 @@ APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { {3, nullptr, "GetLastThrottlingState"}, {4, nullptr, "ClearLastThrottlingState"}, {5, nullptr, "LoadAndApplySettings"}, + {6, nullptr, "SetCpuBoostMode"}, + {7, nullptr, "GetCurrentPerformanceConfiguration"}, }; // clang-format on diff --git a/src/core/hle/service/audio/audctl.cpp b/src/core/hle/service/audio/audctl.cpp index b6b71f966..6a01d4d29 100644 --- a/src/core/hle/service/audio/audctl.cpp +++ b/src/core/hle/service/audio/audctl.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/logging/log.h" +#include "core/hle/ipc_helpers.h" #include "core/hle/service/audio/audctl.h" namespace Service::Audio { @@ -11,8 +13,8 @@ AudCtl::AudCtl() : ServiceFramework{"audctl"} { static const FunctionInfo functions[] = { {0, nullptr, "GetTargetVolume"}, {1, nullptr, "SetTargetVolume"}, - {2, nullptr, "GetTargetVolumeMin"}, - {3, nullptr, "GetTargetVolumeMax"}, + {2, &AudCtl::GetTargetVolumeMin, "GetTargetVolumeMin"}, + {3, &AudCtl::GetTargetVolumeMax, "GetTargetVolumeMax"}, {4, nullptr, "IsTargetMute"}, {5, nullptr, "SetTargetMute"}, {6, nullptr, "IsTargetConnected"}, @@ -44,4 +46,28 @@ AudCtl::AudCtl() : ServiceFramework{"audctl"} { AudCtl::~AudCtl() = default; +void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Audio, "called."); + + // This service function is currently hardcoded on the + // actual console to this value (as of 8.0.0). + constexpr s32 target_min_volume = 0; + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(target_min_volume); +} + +void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Audio, "called."); + + // This service function is currently hardcoded on the + // actual console to this value (as of 8.0.0). + constexpr s32 target_max_volume = 15; + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(target_max_volume); +} + } // namespace Service::Audio diff --git a/src/core/hle/service/audio/audctl.h b/src/core/hle/service/audio/audctl.h index 9d2d9e83b..c7fafc02e 100644 --- a/src/core/hle/service/audio/audctl.h +++ b/src/core/hle/service/audio/audctl.h @@ -12,6 +12,10 @@ class AudCtl final : public ServiceFramework<AudCtl> { public: explicit AudCtl(); ~AudCtl() override; + +private: + void GetTargetVolumeMin(Kernel::HLERequestContext& ctx); + void GetTargetVolumeMax(Kernel::HLERequestContext& ctx); }; } // namespace Service::Audio diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index e5daefdde..d7f1d348d 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp @@ -25,6 +25,7 @@ public: {11, nullptr, "GetAudioInBufferCount"}, {12, nullptr, "SetAudioInDeviceGain"}, {13, nullptr, "GetAudioInDeviceGain"}, + {14, nullptr, "FlushAudioInBuffers"}, }; // clang-format on diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 39acb7b23..6ba41b20a 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -44,7 +44,7 @@ public: std::string&& unique_name) : ServiceFramework("IAudioOut"), audio_core(audio_core), device_name(std::move(device_name)), audio_params(audio_params) { - + // clang-format off static const FunctionInfo functions[] = { {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"}, {1, &IAudioOut::StartAudioOut, "StartAudioOut"}, @@ -58,13 +58,16 @@ public: {9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"}, {10, nullptr, "GetAudioOutPlayedSampleCount"}, {11, nullptr, "FlushAudioOutBuffers"}, + {12, nullptr, "SetAudioOutVolume"}, + {13, nullptr, "GetAudioOutVolume"}, }; + // clang-format on RegisterHandlers(functions); // This is the event handle used to check if the audio buffer was released auto& system = Core::System::GetInstance(); buffer_event = Kernel::WritableEvent::CreateEventPair( - system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased"); + system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate, audio_params.channel_count, std::move(unique_name), diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 1dde6edb7..75db0c2dc 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -8,6 +8,7 @@ #include "audio_core/audio_renderer.h" #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_funcs.h" #include "common/logging/log.h" #include "common/string_util.h" @@ -46,7 +47,7 @@ public: auto& system = Core::System::GetInstance(); system_event = Kernel::WritableEvent::CreateEventPair( - system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent"); + system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, system_event.writable); } @@ -178,7 +179,7 @@ public: RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); - buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, "IAudioOutBufferReleasedEvent"); } @@ -262,64 +263,304 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { OpenAudioRendererImpl(ctx); } +static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) { + // +1 represents the final mix. + return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count + + 1; +} + void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { - IPC::RequestParser rp{ctx}; - auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); LOG_DEBUG(Service_Audio, "called"); - u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); - buffer_sz += params.submix_count * 1024; - buffer_sz += 0x940 * (params.submix_count + 1); - buffer_sz += 0x3F0 * params.voice_count; - buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10); - buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); - buffer_sz += Common::AlignUp( - (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) * - (params.mix_buffer_count + 6), - 0x40); - - if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - const u32 count = params.submix_count + 1; - u64 node_count = Common::AlignUp(count, 0x40); - const u64 node_state_buffer_sz = - 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); - u64 edge_matrix_buffer_sz = 0; - node_count = Common::AlignUp(count * count, 0x40); - if (node_count >> 31 != 0) { - edge_matrix_buffer_sz = (node_count | 7) / 8; - } else { - edge_matrix_buffer_sz = node_count / 8; + // Several calculations below align the sizes being calculated + // onto a 64 byte boundary. + static constexpr u64 buffer_alignment_size = 64; + + // Some calculations that calculate portions of the buffer + // that will contain information, on the other hand, align + // the result of some of their calcularions on a 16 byte boundary. + static constexpr u64 info_field_alignment_size = 16; + + // Maximum detail entries that may exist at one time for performance + // frame statistics. + static constexpr u64 max_perf_detail_entries = 100; + + // Size of the data structure representing the bulk of the voice-related state. + static constexpr u64 voice_state_size = 0x100; + + // Size of the upsampler manager data structure + constexpr u64 upsampler_manager_size = 0x48; + + // Calculates the part of the size that relates to mix buffers. + const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) { + // As of 8.0.0 this is the maximum on voice channels. + constexpr u64 max_voice_channels = 6; + + // The service expects the sample_count member of the parameters to either be + // a value of 160 or 240, so the maximum sample count is assumed in order + // to adequately handle all values at runtime. + constexpr u64 default_max_sample_count = 240; + + const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels; + + u64 size = 0; + size += total_mix_buffers * (sizeof(s32) * params.sample_count); + size += total_mix_buffers * (sizeof(s32) * default_max_sample_count); + size += u64{params.submix_count} + params.sink_count; + size = Common::AlignUp(size, buffer_alignment_size); + size += Common::AlignUp(params.unknown_30, buffer_alignment_size); + size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size); + return size; + }; + + // Calculates the portion of the size related to the mix data (and the sorting thereof). + const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) { + // The size of the mixing info data structure. + constexpr u64 mix_info_size = 0x940; + + // Consists of total submixes with the final mix included. + const u64 total_mix_count = u64{params.submix_count} + 1; + + // The total number of effects that may be available to the audio renderer at any time. + constexpr u64 max_effects = 256; + + // Calculates the part of the size related to the audio node state. + // This will only be used if the audio revision supports the splitter. + const auto calculate_node_state_size = [](std::size_t num_nodes) { + // Internally within a nodestate, it appears to use a data structure + // similar to a std::bitset<64> twice. + constexpr u64 bit_size = Common::BitSize<u64>(); + constexpr u64 num_bitsets = 2; + + // Node state instances have three states internally for performing + // depth-first searches of nodes. Initialized, Found, and Done Sorting. + constexpr u64 num_states = 3; + + u64 size = 0; + size += (num_nodes * num_nodes) * sizeof(s32); + size += num_states * (num_nodes * sizeof(s32)); + size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>()); + return size; + }; + + // Calculates the part of the size related to the adjacency (aka edge) matrix. + const auto calculate_edge_matrix_size = [](std::size_t num_nodes) { + return (num_nodes * num_nodes) * sizeof(s32); + }; + + u64 size = 0; + size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size); + size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size); + size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count, + info_field_alignment_size); + + if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { + size += Common::AlignUp(calculate_node_state_size(total_mix_count) + + calculate_edge_matrix_size(total_mix_count), + info_field_alignment_size); } - buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10); - } - buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; - if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - buffer_sz += 0xE0 * params.num_splitter_send_channels; - buffer_sz += 0x20 * params.splitter_count; - buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10); - } - buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; - u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + - ((params.voice_count * 256) | 0x40); - - if (params.performance_frame_count >= 1) { - output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + - 16 * params.voice_count + 16) + - 0x658) * - (params.performance_frame_count + 1) + - 0xc0, - 0x40) + - output_sz; - } - output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000); + return size; + }; - IPC::ResponseBuilder rb{ctx, 4}; + // Calculates the part of the size related to voice channel info. + const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) { + constexpr u64 voice_info_size = 0x220; + constexpr u64 voice_resource_size = 0xD0; + + u64 size = 0; + size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size); + size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size); + size += + Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size); + size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size); + return size; + }; + + // Calculates the part of the size related to memory pools. + const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) { + const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count); + const u64 memory_pool_info_size = 0x20; + return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size); + }; + + // Calculates the part of the size related to the splitter context. + const auto calculate_splitter_context_size = + [this](const AudioCore::AudioRendererParameter& params) -> u64 { + if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { + return 0; + } + + constexpr u64 splitter_info_size = 0x20; + constexpr u64 splitter_destination_data_size = 0xE0; + + u64 size = 0; + size += params.num_splitter_send_channels; + size += + Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size); + size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels, + info_field_alignment_size); + + return size; + }; + + // Calculates the part of the size related to the upsampler info. + const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) { + constexpr u64 upsampler_info_size = 0x280; + // Yes, using the buffer size over info alignment size is intentional here. + return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count), + buffer_alignment_size); + }; + + // Calculates the part of the size related to effect info. + const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) { + constexpr u64 effect_info_size = 0x2B0; + return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size); + }; + + // Calculates the part of the size related to audio sink info. + const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) { + const u64 sink_info_size = 0x170; + return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size); + }; + + // Calculates the part of the size related to voice state info. + const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) { + const u64 voice_state_size = 0x100; + const u64 additional_size = buffer_alignment_size - 1; + return Common::AlignUp(voice_state_size * params.voice_count + additional_size, + info_field_alignment_size); + }; + + // Calculates the part of the size related to performance statistics. + const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) { + // Extra size value appended to the end of the calculation. + constexpr u64 appended = 128; + + // Whether or not we assume the newer version of performance metrics data structures. + const bool is_v2 = + IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision); + + // Data structure sizes + constexpr u64 perf_statistics_size = 0x0C; + const u64 header_size = is_v2 ? 0x30 : 0x18; + const u64 entry_size = is_v2 ? 0x18 : 0x10; + const u64 detail_size = is_v2 ? 0x18 : 0x10; + + const u64 entry_count = CalculateNumPerformanceEntries(params); + const u64 size_per_frame = + header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries); + + u64 size = 0; + size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1, + buffer_alignment_size); + size += Common::AlignUp(perf_statistics_size, buffer_alignment_size); + size += appended; + return size; + }; + + // Calculates the part of the size that relates to the audio command buffer. + const auto calculate_command_buffer_size = + [this](const AudioCore::AudioRendererParameter& params) { + constexpr u64 alignment = (buffer_alignment_size - 1) * 2; + + if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { + constexpr u64 command_buffer_size = 0x18000; + + return command_buffer_size + alignment; + } + + // When the variadic command buffer is supported, this means + // the command generator for the audio renderer can issue commands + // that are (as one would expect), variable in size. So what we need to do + // is determine the maximum possible size for a few command data structures + // then multiply them by the amount of present commands indicated by the given + // respective audio parameters. + + constexpr u64 max_biquad_filters = 2; + constexpr u64 max_mix_buffers = 24; + + constexpr u64 biquad_filter_command_size = 0x2C; + + constexpr u64 depop_mix_command_size = 0x24; + constexpr u64 depop_setup_command_size = 0x50; + + constexpr u64 effect_command_max_size = 0x540; + + constexpr u64 mix_command_size = 0x1C; + constexpr u64 mix_ramp_command_size = 0x24; + constexpr u64 mix_ramp_grouped_command_size = 0x13C; + + constexpr u64 perf_command_size = 0x28; + + constexpr u64 sink_command_size = 0x130; + + constexpr u64 submix_command_max_size = + depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; + + constexpr u64 volume_command_size = 0x1C; + constexpr u64 volume_ramp_command_size = 0x20; + + constexpr u64 voice_biquad_filter_command_size = + biquad_filter_command_size * max_biquad_filters; + constexpr u64 voice_data_command_size = 0x9C; + const u64 voice_command_max_size = + (params.splitter_count * depop_setup_command_size) + + (voice_data_command_size + voice_biquad_filter_command_size + + volume_ramp_command_size + mix_ramp_grouped_command_size); + + // Now calculate the individual elements that comprise the size and add them together. + const u64 effect_commands_size = params.effect_count * effect_command_max_size; + + const u64 final_mix_commands_size = + depop_mix_command_size + volume_command_size * max_mix_buffers; + const u64 perf_commands_size = + perf_command_size * + (CalculateNumPerformanceEntries(params) + max_perf_detail_entries); + + const u64 sink_commands_size = params.sink_count * sink_command_size; + + const u64 splitter_commands_size = + params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; + + const u64 submix_commands_size = params.submix_count * submix_command_max_size; + + const u64 voice_commands_size = params.voice_count * voice_command_max_size; + + return effect_commands_size + final_mix_commands_size + perf_commands_size + + sink_commands_size + splitter_commands_size + submix_commands_size + + voice_commands_size + alignment; + }; + + IPC::RequestParser rp{ctx}; + const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); + + u64 size = 0; + size += calculate_mix_buffer_sizes(params); + size += calculate_mix_info_size(params); + size += calculate_voice_info_size(params); + size += upsampler_manager_size; + size += calculate_memory_pools_size(params); + size += calculate_splitter_context_size(params); + + size = Common::AlignUp(size, buffer_alignment_size); + + size += calculate_upsampler_info_size(params); + size += calculate_effect_info_size(params); + size += calculate_sink_info_size(params); + size += calculate_voice_state_size(params); + size += calculate_perf_size(params); + size += calculate_command_buffer_size(params); + + // finally, 4KB page align the size, and we're done. + size = Common::AlignUp(size, 4096); + + IPC::ResponseBuilder rb{ctx, 4}; rb.Push(RESULT_SUCCESS); - rb.Push<u64>(output_sz); + rb.Push<u64>(size); - LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz); + LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size); } void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { @@ -357,10 +598,15 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { } bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { - u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap + // Byte swap + const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); + switch (feature) { case AudioFeatures::Splitter: - return version_num >= 2u; + return version_num >= 2U; + case AudioFeatures::PerformanceMetricsVersion2: + case AudioFeatures::VariadicCommandBuffer: + return version_num >= 5U; default: return false; } diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index e55d25973..1d3c8df61 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h @@ -28,6 +28,8 @@ private: enum class AudioFeatures : u32 { Splitter, + PerformanceMetricsVersion2, + VariadicCommandBuffer, }; bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp index 59ef603e1..3c7ca2c44 100644 --- a/src/core/hle/service/btdrv/btdrv.cpp +++ b/src/core/hle/service/btdrv/btdrv.cpp @@ -34,8 +34,8 @@ public: RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); - register_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, - "BT:RegisterEvent"); + register_event = Kernel::WritableEvent::CreateEventPair( + kernel, Kernel::ResetType::Automatic, "BT:RegisterEvent"); } private: @@ -154,7 +154,8 @@ public: {96, nullptr, "GetLeHidEventInfo"}, {97, nullptr, "RegisterBleHidEvent"}, {98, nullptr, "SetLeScanParameter"}, - {256, nullptr, "GetIsManufacturingMode"} + {256, nullptr, "GetIsManufacturingMode"}, + {257, nullptr, "EmulateBluetoothCrash"}, }; // clang-format on diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp index 4f15c3f19..b439ee7ec 100644 --- a/src/core/hle/service/btm/btm.cpp +++ b/src/core/hle/service/btm/btm.cpp @@ -57,13 +57,13 @@ public: RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); - scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ScanEvent"); connection_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::OneShot, "IBtmUserCore:ConnectionEvent"); + kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConnectionEvent"); service_discovery = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::OneShot, "IBtmUserCore:Discovery"); - config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + kernel, Kernel::ResetType::Automatic, "IBtmUserCore:Discovery"); + config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConfigEvent"); } diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp index ae7b0720b..907f464ab 100644 --- a/src/core/hle/service/caps/caps.cpp +++ b/src/core/hle/service/caps/caps.cpp @@ -15,32 +15,41 @@ public: explicit CAPS_A() : ServiceFramework{"caps:a"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "Unknown1"}, - {1, nullptr, "Unknown2"}, - {2, nullptr, "Unknown3"}, - {3, nullptr, "Unknown4"}, - {4, nullptr, "Unknown5"}, - {5, nullptr, "Unknown6"}, - {6, nullptr, "Unknown7"}, - {7, nullptr, "Unknown8"}, - {8, nullptr, "Unknown9"}, - {9, nullptr, "Unknown10"}, - {10, nullptr, "Unknown11"}, - {11, nullptr, "Unknown12"}, - {12, nullptr, "Unknown13"}, - {13, nullptr, "Unknown14"}, - {14, nullptr, "Unknown15"}, - {301, nullptr, "Unknown16"}, - {401, nullptr, "Unknown17"}, - {501, nullptr, "Unknown18"}, - {1001, nullptr, "Unknown19"}, - {1002, nullptr, "Unknown20"}, - {8001, nullptr, "Unknown21"}, - {8002, nullptr, "Unknown22"}, - {8011, nullptr, "Unknown23"}, - {8012, nullptr, "Unknown24"}, - {8021, nullptr, "Unknown25"}, - {10011, nullptr, "Unknown26"}, + {0, nullptr, "GetAlbumFileCount"}, + {1, nullptr, "GetAlbumFileList"}, + {2, nullptr, "LoadAlbumFile"}, + {3, nullptr, "DeleteAlbumFile"}, + {4, nullptr, "StorageCopyAlbumFile"}, + {5, nullptr, "IsAlbumMounted"}, + {6, nullptr, "GetAlbumUsage"}, + {7, nullptr, "GetAlbumFileSize"}, + {8, nullptr, "LoadAlbumFileThumbnail"}, + {9, nullptr, "LoadAlbumScreenShotImage"}, + {10, nullptr, "LoadAlbumScreenShotThumbnailImage"}, + {11, nullptr, "GetAlbumEntryFromApplicationAlbumEntry"}, + {12, nullptr, "Unknown12"}, + {13, nullptr, "Unknown13"}, + {14, nullptr, "Unknown14"}, + {15, nullptr, "Unknown15"}, + {16, nullptr, "Unknown16"}, + {17, nullptr, "Unknown17"}, + {18, nullptr, "Unknown18"}, + {202, nullptr, "SaveEditedScreenShot"}, + {301, nullptr, "GetLastThumbnail"}, + {401, nullptr, "GetAutoSavingStorage"}, + {501, nullptr, "GetRequiredStorageSpaceSizeToCopyAll"}, + {1001, nullptr, "Unknown1001"}, + {1002, nullptr, "Unknown1002"}, + {1003, nullptr, "Unknown1003"}, + {8001, nullptr, "ForceAlbumUnmounted"}, + {8002, nullptr, "ResetAlbumMountStatus"}, + {8011, nullptr, "RefreshAlbumCache"}, + {8012, nullptr, "GetAlbumCache"}, + {8013, nullptr, "Unknown8013"}, + {8021, nullptr, "GetAlbumEntryFromApplicationAlbumEntryAruid"}, + {10011, nullptr, "SetInternalErrorConversionEnabled"}, + {50000, nullptr, "Unknown50000"}, + {60002, nullptr, "Unknown60002"}, }; // clang-format on @@ -53,16 +62,17 @@ public: explicit CAPS_C() : ServiceFramework{"caps:c"} { // clang-format off static const FunctionInfo functions[] = { - {2001, nullptr, "Unknown1"}, - {2002, nullptr, "Unknown2"}, - {2011, nullptr, "Unknown3"}, - {2012, nullptr, "Unknown4"}, - {2013, nullptr, "Unknown5"}, - {2014, nullptr, "Unknown6"}, - {2101, nullptr, "Unknown7"}, - {2102, nullptr, "Unknown8"}, - {2201, nullptr, "Unknown9"}, - {2301, nullptr, "Unknown10"}, + {33, nullptr, "Unknown33"}, + {2001, nullptr, "Unknown2001"}, + {2002, nullptr, "Unknown2002"}, + {2011, nullptr, "Unknown2011"}, + {2012, nullptr, "Unknown2012"}, + {2013, nullptr, "Unknown2013"}, + {2014, nullptr, "Unknown2014"}, + {2101, nullptr, "Unknown2101"}, + {2102, nullptr, "Unknown2102"}, + {2201, nullptr, "Unknown2201"}, + {2301, nullptr, "Unknown2301"}, }; // clang-format on @@ -127,11 +137,18 @@ public: explicit CAPS_U() : ServiceFramework{"caps:u"} { // clang-format off static const FunctionInfo functions[] = { + {32, nullptr, "SetShimLibraryVersion"}, {102, nullptr, "GetAlbumFileListByAruid"}, {103, nullptr, "DeleteAlbumFileByAruid"}, {104, nullptr, "GetAlbumFileSizeByAruid"}, + {105, nullptr, "DeleteAlbumFileByAruidForDebug"}, {110, nullptr, "LoadAlbumScreenShotImageByAruid"}, {120, nullptr, "LoadAlbumScreenShotThumbnailImageByAruid"}, + {130, nullptr, "PrecheckToCreateContentsByAruid"}, + {140, nullptr, "GetAlbumFileList1AafeAruidDeprecated"}, + {141, nullptr, "GetAlbumFileList2AafeUidAruidDeprecated"}, + {142, nullptr, "GetAlbumFileList3AaeAruid"}, + {143, nullptr, "GetAlbumFileList4AaeUidAruid"}, {60002, nullptr, "OpenAccessorSessionForApplication"}, }; // clang-format on diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 0249b6992..e7df8fd98 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -664,10 +664,13 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {100, nullptr, "OpenImageDirectoryFileSystem"}, {110, nullptr, "OpenContentStorageFileSystem"}, {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"}, + {130, nullptr, "OpenCustomStorageFileSystem"}, {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"}, {201, nullptr, "OpenDataStorageByProgramId"}, {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"}, {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"}, + {204, nullptr, "OpenDataFileSystemByProgramIndex"}, + {205, nullptr, "OpenDataStorageByProgramIndex"}, {400, nullptr, "OpenDeviceOperator"}, {500, nullptr, "OpenSdCardDetectionEventNotifier"}, {501, nullptr, "OpenGameCardDetectionEventNotifier"}, @@ -691,6 +694,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {614, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId"}, {615, nullptr, "QuerySaveDataInternalStorageTotalSize"}, {616, nullptr, "GetSaveDataCommitId"}, + {617, nullptr, "UnregisterExternalKey"}, {620, nullptr, "SetSdCardEncryptionSeed"}, {630, nullptr, "SetSdCardAccessibility"}, {631, nullptr, "IsSdCardAccessible"}, @@ -701,6 +705,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {710, nullptr, "ResolveAccessFailure"}, {720, nullptr, "AbandonAccessFailure"}, {800, nullptr, "GetAndClearFileSystemProxyErrorInfo"}, + {810, nullptr, "RegisterProgramIndexMapInfo"}, {1000, nullptr, "SetBisRootForHost"}, {1001, nullptr, "SetSaveDataSize"}, {1002, nullptr, "SetSaveDataRootPath"}, @@ -711,6 +716,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {1007, nullptr, "RegisterUpdatePartition"}, {1008, nullptr, "OpenRegisteredUpdatePartition"}, {1009, nullptr, "GetAndClearMemoryReportInfo"}, + {1010, nullptr, "SetDataStorageRedirectTarget"}, + {1011, nullptr, "OutputAccessLogToSdCard2"}, {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, {1200, nullptr, "OpenMultiCommitManager"}, diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index d9225d624..5100e376c 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -12,6 +12,7 @@ namespace Service::Friend { class IFriendService final : public ServiceFramework<IFriendService> { public: IFriendService() : ServiceFramework("IFriendService") { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "GetCompletionEvent"}, {1, nullptr, "Cancel"}, @@ -24,8 +25,7 @@ public: {10400, nullptr, "GetBlockedUserListIds"}, {10500, nullptr, "GetProfileList"}, {10600, nullptr, "DeclareOpenOnlinePlaySession"}, - {10601, &IFriendService::DeclareCloseOnlinePlaySession, - "DeclareCloseOnlinePlaySession"}, + {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, {10610, &IFriendService::UpdateUserPresence, "UpdateUserPresence"}, {10700, nullptr, "GetPlayHistoryRegistrationKey"}, {10701, nullptr, "GetPlayHistoryRegistrationKeyWithNetworkServiceAccountId"}, @@ -88,6 +88,7 @@ public: {30830, nullptr, "ClearPlayLog"}, {49900, nullptr, "DeleteNetworkServiceAccountCache"}, }; + // clang-format on RegisterHandlers(functions); } diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index e7fc7a619..fdd6d79a2 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -170,7 +170,7 @@ void Controller_NPad::InitNewlyAddedControler(std::size_t controller_idx) { void Controller_NPad::OnInit() { auto& kernel = Core::System::GetInstance().Kernel(); styleset_changed_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::OneShot, "npad:NpadStyleSetChanged"); + kernel, Kernel::ResetType::Automatic, "npad:NpadStyleSetChanged"); if (!IsControllerActivated()) { return; diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 63b55758b..a4ad95d96 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -210,6 +210,7 @@ Hid::Hid() : ServiceFramework("hid") { {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, + {134, nullptr, "SetNpadAnalogStickUseCenterClamp"}, {200, &Hid::GetVibrationDeviceInfo, "GetVibrationDeviceInfo"}, {201, &Hid::SendVibrationValue, "SendVibrationValue"}, {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"}, @@ -221,6 +222,7 @@ Hid::Hid() : ServiceFramework("hid") { {208, nullptr, "GetActualVibrationGcErmCommand"}, {209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"}, {210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"}, + {211, nullptr, "IsVibrationDeviceMounted"}, {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"}, {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"}, {302, nullptr, "StopConsoleSixAxisSensor"}, @@ -265,6 +267,7 @@ Hid::Hid() : ServiceFramework("hid") { {523, nullptr, "SetIsPalmaPairedConnectable"}, {524, nullptr, "PairPalma"}, {525, &Hid::SetPalmaBoostMode, "SetPalmaBoostMode"}, + {526, nullptr, "CancelWritePalmaWaveEntry"}, {1000, nullptr, "SetNpadCommunicationMode"}, {1001, nullptr, "GetNpadCommunicationMode"}, }; @@ -797,12 +800,22 @@ public: {232, nullptr, "EnableShipmentMode"}, {233, nullptr, "ClearPairingInfo"}, {234, nullptr, "GetUniquePadDeviceTypeSetInternal"}, + {235, nullptr, "EnableAnalogStickPower"}, {301, nullptr, "GetAbstractedPadHandles"}, {302, nullptr, "GetAbstractedPadState"}, {303, nullptr, "GetAbstractedPadsState"}, {321, nullptr, "SetAutoPilotVirtualPadState"}, {322, nullptr, "UnsetAutoPilotVirtualPadState"}, {323, nullptr, "UnsetAllAutoPilotVirtualPadState"}, + {324, nullptr, "AttachHdlsWorkBuffer"}, + {325, nullptr, "ReleaseHdlsWorkBuffer"}, + {326, nullptr, "DumpHdlsNpadAssignmentState"}, + {327, nullptr, "DumpHdlsStates"}, + {328, nullptr, "ApplyHdlsNpadAssignmentState"}, + {329, nullptr, "ApplyHdlsStateList"}, + {330, nullptr, "AttachHdlsVirtualDevice"}, + {331, nullptr, "DetachHdlsVirtualDevice"}, + {332, nullptr, "SetHdlsState"}, {350, nullptr, "AddRegisteredDevice"}, {400, nullptr, "DisableExternalMcuOnNxDevice"}, {401, nullptr, "DisableRailDeviceFiltering"}, @@ -825,6 +838,7 @@ public: {131, nullptr, "ActivateSleepButton"}, {141, nullptr, "AcquireCaptureButtonEventHandle"}, {151, nullptr, "ActivateCaptureButton"}, + {161, nullptr, "GetPlatformConfig"}, {210, nullptr, "AcquireNfcDeviceUpdateEventHandle"}, {211, nullptr, "GetNpadsWithNfc"}, {212, nullptr, "AcquireNfcActivateEventHandle"}, @@ -894,6 +908,7 @@ public: {827, nullptr, "IsAnalogStickButtonPressed"}, {828, nullptr, "IsAnalogStickInReleasePosition"}, {829, nullptr, "IsAnalogStickInCircumference"}, + {830, nullptr, "SetNotificationLedPattern"}, {850, nullptr, "IsUsbFullKeyControllerEnabled"}, {851, nullptr, "EnableUsbFullKeyController"}, {852, nullptr, "IsUsbConnected"}, diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index e250595e3..ed5059047 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -52,9 +52,11 @@ public: } }; -class ILocalCommunicationService final : public ServiceFramework<ILocalCommunicationService> { +class ISystemLocalCommunicationService final + : public ServiceFramework<ISystemLocalCommunicationService> { public: - explicit ILocalCommunicationService(const char* name) : ServiceFramework{name} { + explicit ISystemLocalCommunicationService() + : ServiceFramework{"ISystemLocalCommunicationService"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "GetState"}, @@ -84,6 +86,50 @@ public: {304, nullptr, "Disconnect"}, {400, nullptr, "InitializeSystem"}, {401, nullptr, "FinalizeSystem"}, + {402, nullptr, "SetOperationMode"}, + {403, nullptr, "InitializeSystem2"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IUserLocalCommunicationService final + : public ServiceFramework<IUserLocalCommunicationService> { +public: + explicit IUserLocalCommunicationService() : ServiceFramework{"IUserLocalCommunicationService"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetState"}, + {1, nullptr, "GetNetworkInfo"}, + {2, nullptr, "GetIpv4Address"}, + {3, nullptr, "GetDisconnectReason"}, + {4, nullptr, "GetSecurityParameter"}, + {5, nullptr, "GetNetworkConfig"}, + {100, nullptr, "AttachStateChangeEvent"}, + {101, nullptr, "GetNetworkInfoLatestUpdate"}, + {102, nullptr, "Scan"}, + {103, nullptr, "ScanPrivate"}, + {104, nullptr, "SetWirelessControllerRestriction"}, + {200, nullptr, "OpenAccessPoint"}, + {201, nullptr, "CloseAccessPoint"}, + {202, nullptr, "CreateNetwork"}, + {203, nullptr, "CreateNetworkPrivate"}, + {204, nullptr, "DestroyNetwork"}, + {205, nullptr, "Reject"}, + {206, nullptr, "SetAdvertiseData"}, + {207, nullptr, "SetStationAcceptPolicy"}, + {208, nullptr, "AddAcceptFilterEntry"}, + {209, nullptr, "ClearAcceptFilter"}, + {300, nullptr, "OpenStation"}, + {301, nullptr, "CloseStation"}, + {302, nullptr, "Connect"}, + {303, nullptr, "ConnectPrivate"}, + {304, nullptr, "Disconnect"}, + {400, nullptr, "Initialize"}, + {401, nullptr, "Finalize"}, + {402, nullptr, "SetOperationMode"}, }; // clang-format on @@ -108,7 +154,7 @@ public: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ILocalCommunicationService>("ISystemLocalCommunicationService"); + rb.PushIpcInterface<ISystemLocalCommunicationService>(); } }; @@ -129,7 +175,7 @@ public: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ILocalCommunicationService>("IUserLocalCommunicationService"); + rb.PushIpcInterface<IUserLocalCommunicationService>(); } }; diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 609102f2c..5af925515 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -86,6 +86,7 @@ public: {2, &RelocatableObject::LoadNrr, "LoadNrr"}, {3, &RelocatableObject::UnloadNrr, "UnloadNrr"}, {4, &RelocatableObject::Initialize, "Initialize"}, + {10, nullptr, "LoadNrrEx"}, }; // clang-format on diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp index c6babdd4d..a5cb06f8a 100644 --- a/src/core/hle/service/nfp/nfp.cpp +++ b/src/core/hle/service/nfp/nfp.cpp @@ -26,7 +26,7 @@ constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152); Module::Interface::Interface(std::shared_ptr<Module> module, const char* name) : ServiceFramework(name), module(std::move(module)) { auto& kernel = Core::System::GetInstance().Kernel(); - nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, "IUser:NFCTagDetected"); } @@ -67,9 +67,9 @@ public: auto& kernel = Core::System::GetInstance().Kernel(); deactivate_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::OneShot, "IUser:DeactivateEvent"); + kernel, Kernel::ResetType::Automatic, "IUser:DeactivateEvent"); availability_change_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent"); + kernel, Kernel::ResetType::Automatic, "IUser:AvailabilityChangeEvent"); } private: diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 60479bb45..76b12b482 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -15,12 +15,16 @@ namespace Service::NIFM { class IScanRequest final : public ServiceFramework<IScanRequest> { public: explicit IScanRequest() : ServiceFramework("IScanRequest") { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "Submit"}, {1, nullptr, "IsProcessing"}, {2, nullptr, "GetResult"}, {3, nullptr, "GetSystemEventReadableHandle"}, + {4, nullptr, "SetChannels"}, }; + // clang-format on + RegisterHandlers(functions); } }; @@ -58,9 +62,9 @@ public: RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); - event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, "IRequest:Event1"); - event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, "IRequest:Event2"); } diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp index 0dabcd23b..f319a3ca1 100644 --- a/src/core/hle/service/nim/nim.cpp +++ b/src/core/hle/service/nim/nim.cpp @@ -141,7 +141,7 @@ public: auto& kernel = Core::System::GetInstance().Kernel(); finished_event = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::OneShot, + kernel, Kernel::ResetType::Automatic, "IEnsureNetworkClockAvailabilityService:FinishEvent"); } diff --git a/src/core/hle/service/npns/npns.cpp b/src/core/hle/service/npns/npns.cpp index ccb6f9da9..8751522ca 100644 --- a/src/core/hle/service/npns/npns.cpp +++ b/src/core/hle/service/npns/npns.cpp @@ -45,7 +45,7 @@ public: {114, nullptr, "AttachJid"}, {115, nullptr, "DetachJid"}, {201, nullptr, "RequestChangeStateForceTimed"}, - {102, nullptr, "RequestChangeStateForceAsync"}, + {202, nullptr, "RequestChangeStateForceAsync"}, }; // clang-format on @@ -73,6 +73,7 @@ public: {103, nullptr, "GetState"}, {104, nullptr, "GetStatistics"}, {111, nullptr, "GetJid"}, + {120, nullptr, "CreateNotificationReceiver"}, }; // clang-format on diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 3b9ab4b14..b60fc748b 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp @@ -129,7 +129,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); - query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot, + query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, "NVDRV::query_event"); } diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 4d150fc71..5731e815f 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -16,7 +16,7 @@ namespace Service::NVFlinger { BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) { auto& kernel = Core::System::GetInstance().Kernel(); - buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, + buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "BufferQueue NativeHandle"); } diff --git a/src/core/hle/service/pctl/module.cpp b/src/core/hle/service/pctl/module.cpp index 6081f41e1..c75b4ee34 100644 --- a/src/core/hle/service/pctl/module.cpp +++ b/src/core/hle/service/pctl/module.cpp @@ -12,10 +12,10 @@ namespace Service::PCTL { class IParentalControlService final : public ServiceFramework<IParentalControlService> { public: IParentalControlService() : ServiceFramework("IParentalControlService") { + // clang-format off static const FunctionInfo functions[] = { {1, &IParentalControlService::Initialize, "Initialize"}, - {1001, &IParentalControlService::CheckFreeCommunicationPermission, - "CheckFreeCommunicationPermission"}, + {1001, &IParentalControlService::CheckFreeCommunicationPermission, "CheckFreeCommunicationPermission"}, {1002, nullptr, "ConfirmLaunchApplicationPermission"}, {1003, nullptr, "ConfirmResumeApplicationPermission"}, {1004, nullptr, "ConfirmSnsPostPermission"}, @@ -30,6 +30,7 @@ public: {1013, nullptr, "ConfirmStereoVisionPermission"}, {1014, nullptr, "ConfirmPlayableApplicationVideoOld"}, {1015, nullptr, "ConfirmPlayableApplicationVideo"}, + {1016, nullptr, "ConfirmShowNewsPermission"}, {1031, nullptr, "IsRestrictionEnabled"}, {1032, nullptr, "GetSafetyLevel"}, {1033, nullptr, "SetSafetyLevel"}, @@ -45,6 +46,7 @@ public: {1045, nullptr, "UpdateFreeCommunicationApplicationList"}, {1046, nullptr, "DisableFeaturesForReset"}, {1047, nullptr, "NotifyApplicationDownloadStarted"}, + {1048, nullptr, "NotifyNetworkProfileCreated"}, {1061, nullptr, "ConfirmStereoVisionRestrictionConfigurable"}, {1062, nullptr, "GetStereoVisionRestriction"}, {1063, nullptr, "SetStereoVisionRestriction"}, @@ -63,6 +65,7 @@ public: {1411, nullptr, "GetPairingAccountInfo"}, {1421, nullptr, "GetAccountNickname"}, {1424, nullptr, "GetAccountState"}, + {1425, nullptr, "RequestPostEvents"}, {1432, nullptr, "GetSynchronizationEvent"}, {1451, nullptr, "StartPlayTimer"}, {1452, nullptr, "StopPlayTimer"}, diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index 6b27dc4a3..ebcc41a43 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp @@ -42,15 +42,18 @@ private: class DebugMonitor final : public ServiceFramework<DebugMonitor> { public: explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { + // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "IsDebugMode"}, - {1, nullptr, "GetDebugProcesses"}, - {2, nullptr, "StartDebugProcess"}, - {3, nullptr, "GetTitlePid"}, - {4, nullptr, "EnableDebugForTitleId"}, - {5, nullptr, "GetApplicationPid"}, - {6, nullptr, "EnableDebugForApplication"}, + {0, nullptr, "GetDebugProcesses"}, + {1, nullptr, "StartDebugProcess"}, + {2, nullptr, "GetTitlePid"}, + {3, nullptr, "EnableDebugForTitleId"}, + {4, nullptr, "GetApplicationPid"}, + {5, nullptr, "EnableDebugForApplication"}, + {6, nullptr, "DisableDebug"}, }; + // clang-format on + RegisterHandlers(functions); } }; @@ -68,6 +71,7 @@ public: class Shell final : public ServiceFramework<Shell> { public: explicit Shell() : ServiceFramework{"pm:shell"} { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "LaunchProcess"}, {1, nullptr, "TerminateProcessByPid"}, @@ -77,7 +81,10 @@ public: {5, nullptr, "NotifyBootFinished"}, {6, nullptr, "GetApplicationPid"}, {7, nullptr, "BoostSystemMemoryResourceLimit"}, + {8, nullptr, "EnableAdditionalSystemThreads"}, }; + // clang-format on + RegisterHandlers(functions); } }; diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index 1afc43f75..298d85011 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp @@ -2,16 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <chrono> #include "common/logging/log.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/client_port.h" -#include "core/hle/kernel/client_session.h" #include "core/hle/service/set/set.h" #include "core/settings.h" namespace Service::Set { - +namespace { constexpr std::array<LanguageCode, 17> available_language_codes = {{ LanguageCode::JA, LanguageCode::EN_US, @@ -32,41 +31,35 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{ LanguageCode::ZH_HANT, }}; -constexpr std::size_t pre4_0_0_max_entries = 0xF; -constexpr std::size_t post4_0_0_max_entries = 0x40; +constexpr std::size_t pre4_0_0_max_entries = 15; +constexpr std::size_t post4_0_0_max_entries = 17; constexpr ResultCode ERR_INVALID_LANGUAGE{ErrorModule::Settings, 625}; -LanguageCode GetLanguageCodeFromIndex(std::size_t index) { - return available_language_codes.at(index); +void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_language_codes) { + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(static_cast<u32>(num_language_codes)); } -template <std::size_t size> -static std::array<LanguageCode, size> MakeLanguageCodeSubset() { - std::array<LanguageCode, size> arr; - std::copy_n(available_language_codes.begin(), size, arr.begin()); - return arr; +void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_size) { + const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode); + const std::size_t copy_amount = std::min(requested_amount, max_size); + const std::size_t copy_size = copy_amount * sizeof(LanguageCode); + + ctx.WriteBuffer(available_language_codes.data(), copy_size); + PushResponseLanguageCode(ctx, copy_amount); } +} // Anonymous namespace -static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) { - IPC::ResponseBuilder rb{ctx, 3}; - rb.Push(RESULT_SUCCESS); - if (available_language_codes.size() > max_size) { - rb.Push(static_cast<u32>(max_size)); - } else { - rb.Push(static_cast<u32>(available_language_codes.size())); - } +LanguageCode GetLanguageCodeFromIndex(std::size_t index) { + return available_language_codes.at(index); } void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_SET, "called"); - if (available_language_codes.size() > pre4_0_0_max_entries) { - ctx.WriteBuffer(MakeLanguageCodeSubset<pre4_0_0_max_entries>()); - } else { - ctx.WriteBuffer(available_language_codes); - } - PushResponseLanguageCode(ctx, pre4_0_0_max_entries); + GetAvailableLanguageCodesImpl(ctx, pre4_0_0_max_entries); } void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) { @@ -87,12 +80,7 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) { void SET::GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_SET, "called"); - if (available_language_codes.size() > post4_0_0_max_entries) { - ctx.WriteBuffer(MakeLanguageCodeSubset<post4_0_0_max_entries>()); - } else { - ctx.WriteBuffer(available_language_codes); - } - PushResponseLanguageCode(ctx, post4_0_0_max_entries); + GetAvailableLanguageCodesImpl(ctx, post4_0_0_max_entries); } void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) { @@ -102,9 +90,9 @@ void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) { } void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) { - PushResponseLanguageCode(ctx, post4_0_0_max_entries); - LOG_DEBUG(Service_SET, "called"); + + PushResponseLanguageCode(ctx, post4_0_0_max_entries); } void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { @@ -116,6 +104,7 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { } SET::SET() : ServiceFramework("set") { + // clang-format off static const FunctionInfo functions[] = { {0, &SET::GetLanguageCode, "GetLanguageCode"}, {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"}, @@ -126,7 +115,10 @@ SET::SET() : ServiceFramework("set") { {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, {7, nullptr, "GetKeyCodeMap"}, {8, nullptr, "GetQuestFlag"}, + {9, nullptr, "GetKeyCodeMap2"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/set/set_cal.cpp b/src/core/hle/service/set/set_cal.cpp index 34654bb07..5981c575c 100644 --- a/src/core/hle/service/set/set_cal.cpp +++ b/src/core/hle/service/set/set_cal.cpp @@ -40,7 +40,7 @@ SET_CAL::SET_CAL() : ServiceFramework("set:cal") { {30, nullptr, "GetAmiiboEcqvBlsCertificate"}, {31, nullptr, "GetAmiiboEcqvBlsRootCertificate"}, {32, nullptr, "GetUsbTypeCPowerSourceCircuitVersion"}, - {33, nullptr, "GetBatteryVersion"}, + {41, nullptr, "GetBatteryVersion"}, }; RegisterHandlers(functions); } diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp index ecee554bf..98d0cfdfd 100644 --- a/src/core/hle/service/set/set_sys.cpp +++ b/src/core/hle/service/set/set_sys.cpp @@ -104,6 +104,7 @@ void SET_SYS::SetColorSetId(Kernel::HLERequestContext& ctx) { } SET_SYS::SET_SYS() : ServiceFramework("set:sys") { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "SetLanguageCode"}, {1, nullptr, "SetNetworkSettings"}, @@ -252,7 +253,33 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") { {147, nullptr, "GetConsoleSixAxisSensorAngularAcceleration"}, {148, nullptr, "SetConsoleSixAxisSensorAngularAcceleration"}, {149, nullptr, "GetRebootlessSystemUpdateVersion"}, + {150, nullptr, "GetDeviceTimeZoneLocationUpdatedTime"}, + {151, nullptr, "SetDeviceTimeZoneLocationUpdatedTime"}, + {152, nullptr, "GetUserSystemClockAutomaticCorrectionUpdatedTime"}, + {153, nullptr, "SetUserSystemClockAutomaticCorrectionUpdatedTime"}, + {154, nullptr, "GetAccountOnlineStorageSettings"}, + {155, nullptr, "SetAccountOnlineStorageSettings"}, + {156, nullptr, "GetPctlReadyFlag"}, + {157, nullptr, "SetPctlReadyFlag"}, + {162, nullptr, "GetPtmBatteryVersion"}, + {163, nullptr, "SetPtmBatteryVersion"}, + {164, nullptr, "GetUsb30HostEnableFlag"}, + {165, nullptr, "SetUsb30HostEnableFlag"}, + {166, nullptr, "GetUsb30DeviceEnableFlag"}, + {167, nullptr, "SetUsb30DeviceEnableFlag"}, + {168, nullptr, "GetThemeId"}, + {169, nullptr, "SetThemeId"}, + {170, nullptr, "GetChineseTraditionalInputMethod"}, + {171, nullptr, "SetChineseTraditionalInputMethod"}, + {172, nullptr, "GetPtmCycleCountReliability"}, + {173, nullptr, "SetPtmCycleCountReliability"}, + {175, nullptr, "GetThemeSettings"}, + {176, nullptr, "SetThemeSettings"}, + {177, nullptr, "GetThemeKey"}, + {178, nullptr, "SetThemeKey"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index 4342f3b2d..884ad173b 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -73,6 +73,7 @@ void BSD::Close(Kernel::HLERequestContext& ctx) { } BSD::BSD(const char* name) : ServiceFramework(name) { + // clang-format off static const FunctionInfo functions[] = { {0, &BSD::RegisterClient, "RegisterClient"}, {1, &BSD::StartMonitoring, "StartMonitoring"}, @@ -105,7 +106,11 @@ BSD::BSD(const char* name) : ServiceFramework(name) { {28, nullptr, "GetResourceStatistics"}, {29, nullptr, "RecvMMsg"}, {30, nullptr, "SendMMsg"}, + {31, nullptr, "EventFd"}, + {32, nullptr, "RegisterResourceStatisticsName"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp index f7f87a958..65040c077 100644 --- a/src/core/hle/service/ssl/ssl.cpp +++ b/src/core/hle/service/ssl/ssl.cpp @@ -103,6 +103,8 @@ public: {4, nullptr, "DebugIoctl"}, {5, &SSL::SetInterfaceVersion, "SetInterfaceVersion"}, {6, nullptr, "FlushSessionCache"}, + {7, nullptr, "SetDebugOption"}, + {8, nullptr, "GetDebugOption"}, }; // clang-format on diff --git a/src/core/hle/service/time/interface.cpp b/src/core/hle/service/time/interface.cpp index b3a196f65..8d122ae33 100644 --- a/src/core/hle/service/time/interface.cpp +++ b/src/core/hle/service/time/interface.cpp @@ -8,6 +8,7 @@ namespace Service::Time { Time::Time(std::shared_ptr<Module> time, const char* name) : Module::Interface(std::move(time), name) { + // clang-format off static const FunctionInfo functions[] = { {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"}, {1, &Time::GetStandardNetworkSystemClock, "GetStandardNetworkSystemClock"}, @@ -15,18 +16,23 @@ Time::Time(std::shared_ptr<Module> time, const char* name) {3, &Time::GetTimeZoneService, "GetTimeZoneService"}, {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"}, {5, nullptr, "GetEphemeralNetworkSystemClock"}, + {20, nullptr, "GetSharedMemoryNativeHandle"}, + {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"}, + {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"}, {50, nullptr, "SetStandardSteadyClockInternalOffset"}, {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"}, {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"}, {102, nullptr, "GetStandardUserSystemClockInitialYear"}, {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"}, + {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"}, {300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"}, {400, &Time::GetClockSnapshot, "GetClockSnapshot"}, {401, nullptr, "GetClockSnapshotFromSystemClockContext"}, - {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, - "CalculateStandardUserSystemClockDifferenceByUser"}, + {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, "CalculateStandardUserSystemClockDifferenceByUser"}, {501, nullptr, "CalculateSpanBetween"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index 01d80311b..a8d088305 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -17,7 +17,7 @@ namespace Service::VI { Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} { auto& kernel = Core::System::GetInstance().Kernel(); - vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, + vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, fmt::format("Display VSync Event {}", id)); } diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 07aa7a1cd..10b13fb1d 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -86,25 +86,29 @@ FileType AppLoader_DeconstructedRomDirectory::IdentifyType(const FileSys::Virtua return FileType::Error; } -ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) { +AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirectory::Load( + Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } if (dir == nullptr) { - if (file == nullptr) - return ResultStatus::ErrorNullFile; + if (file == nullptr) { + return {ResultStatus::ErrorNullFile, {}}; + } + dir = file->GetContainingDirectory(); } // Read meta to determine title ID FileSys::VirtualFile npdm = dir->GetFile("main.npdm"); - if (npdm == nullptr) - return ResultStatus::ErrorMissingNPDM; + if (npdm == nullptr) { + return {ResultStatus::ErrorMissingNPDM, {}}; + } - ResultStatus result = metadata.Load(npdm); + const ResultStatus result = metadata.Load(npdm); if (result != ResultStatus::Success) { - return result; + return {result, {}}; } if (override_update) { @@ -114,23 +118,24 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) // Reread in case PatchExeFS affected the main.npdm npdm = dir->GetFile("main.npdm"); - if (npdm == nullptr) - return ResultStatus::ErrorMissingNPDM; + if (npdm == nullptr) { + return {ResultStatus::ErrorMissingNPDM, {}}; + } - ResultStatus result2 = metadata.Load(npdm); + const ResultStatus result2 = metadata.Load(npdm); if (result2 != ResultStatus::Success) { - return result2; + return {result2, {}}; } metadata.Print(); const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()}; if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit || arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { - return ResultStatus::Error32BitISA; + return {ResultStatus::Error32BitISA, {}}; } if (process.LoadFromMetadata(metadata).IsError()) { - return ResultStatus::ErrorUnableToParseKernelMetadata; + return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; } const FileSys::PatchManager pm(metadata.GetTitleID()); @@ -150,7 +155,7 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) const auto tentative_next_load_addr = AppLoader_NSO::LoadModule(process, *module_file, load_addr, should_pass_arguments, pm); if (!tentative_next_load_addr) { - return ResultStatus::ErrorLoadingNSO; + return {ResultStatus::ErrorLoadingNSO, {}}; } next_load_addr = *tentative_next_load_addr; @@ -159,8 +164,6 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false); } - process.Run(base_address, metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()); - // Find the RomFS by searching for a ".romfs" file in this directory const auto& files = dir->GetFiles(); const auto romfs_iter = @@ -175,7 +178,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) } is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, + LoadParameters{metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()}}; } ResultStatus AppLoader_DeconstructedRomDirectory::ReadRomFS(FileSys::VirtualFile& dir) { diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h index 1615cb5a8..1a65c16a4 100644 --- a/src/core/loader/deconstructed_rom_directory.h +++ b/src/core/loader/deconstructed_rom_directory.h @@ -37,7 +37,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; ResultStatus ReadIcon(std::vector<u8>& buffer) override; diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 46ac372f6..6d4b02375 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -382,13 +382,15 @@ FileType AppLoader_ELF::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_ELF::Load(Kernel::Process& process) { - if (is_loaded) - return ResultStatus::ErrorAlreadyLoaded; +AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) { + if (is_loaded) { + return {ResultStatus::ErrorAlreadyLoaded, {}}; + } std::vector<u8> buffer = file->ReadAllBytes(); - if (buffer.size() != file->GetSize()) - return ResultStatus::ErrorIncorrectELFFileSize; + if (buffer.size() != file->GetSize()) { + return {ResultStatus::ErrorIncorrectELFFileSize, {}}; + } const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); ElfReader elf_reader(&buffer[0]); @@ -396,10 +398,9 @@ ResultStatus AppLoader_ELF::Load(Kernel::Process& process) { const VAddr entry_point = codeset.entrypoint; process.LoadModule(std::move(codeset), entry_point); - process.Run(entry_point, 48, Memory::DEFAULT_STACK_SIZE); is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, LoadParameters{48, Memory::DEFAULT_STACK_SIZE}}; } } // namespace Loader diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index a2d33021c..7ef7770a6 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h @@ -26,7 +26,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; }; } // namespace Loader diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index bb925f4a6..f7846db52 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -131,6 +131,12 @@ std::ostream& operator<<(std::ostream& os, ResultStatus status); /// Interface for loading an application class AppLoader : NonCopyable { public: + struct LoadParameters { + s32 main_thread_priority; + u64 main_thread_stack_size; + }; + using LoadResult = std::pair<ResultStatus, std::optional<LoadParameters>>; + explicit AppLoader(FileSys::VirtualFile file); virtual ~AppLoader(); @@ -145,7 +151,7 @@ public: * @param process The newly created process. * @return The status result of the operation. */ - virtual ResultStatus Load(Kernel::Process& process) = 0; + virtual LoadResult Load(Kernel::Process& process) = 0; /** * Loads the system mode that this application needs. diff --git a/src/core/loader/nax.cpp b/src/core/loader/nax.cpp index 93a970d10..34efef09a 100644 --- a/src/core/loader/nax.cpp +++ b/src/core/loader/nax.cpp @@ -41,31 +41,37 @@ FileType AppLoader_NAX::GetFileType() const { return IdentifyTypeImpl(*nax); } -ResultStatus AppLoader_NAX::Load(Kernel::Process& process) { +AppLoader_NAX::LoadResult AppLoader_NAX::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (nax->GetStatus() != ResultStatus::Success) - return nax->GetStatus(); + const auto nax_status = nax->GetStatus(); + if (nax_status != ResultStatus::Success) { + return {nax_status, {}}; + } const auto nca = nax->AsNCA(); if (nca == nullptr) { - if (!Core::Crypto::KeyManager::KeyFileExists(false)) - return ResultStatus::ErrorMissingProductionKeyFile; - return ResultStatus::ErrorNAXInconvertibleToNCA; + if (!Core::Crypto::KeyManager::KeyFileExists(false)) { + return {ResultStatus::ErrorMissingProductionKeyFile, {}}; + } + + return {ResultStatus::ErrorNAXInconvertibleToNCA, {}}; } - if (nca->GetStatus() != ResultStatus::Success) - return nca->GetStatus(); + const auto nca_status = nca->GetStatus(); + if (nca_status != ResultStatus::Success) { + return {nca_status, {}}; + } const auto result = nca_loader->Load(process); - if (result != ResultStatus::Success) + if (result.first != ResultStatus::Success) { return result; + } is_loaded = true; - - return ResultStatus::Success; + return result; } ResultStatus AppLoader_NAX::ReadRomFS(FileSys::VirtualFile& dir) { diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h index f40079574..00f1659c1 100644 --- a/src/core/loader/nax.h +++ b/src/core/loader/nax.h @@ -33,7 +33,7 @@ public: FileType GetFileType() const override; - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp index ce8196fcf..b3f8f1083 100644 --- a/src/core/loader/nca.cpp +++ b/src/core/loader/nca.cpp @@ -30,36 +30,38 @@ FileType AppLoader_NCA::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_NCA::Load(Kernel::Process& process) { +AppLoader_NCA::LoadResult AppLoader_NCA::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } const auto result = nca->GetStatus(); if (result != ResultStatus::Success) { - return result; + return {result, {}}; } - if (nca->GetType() != FileSys::NCAContentType::Program) - return ResultStatus::ErrorNCANotProgram; + if (nca->GetType() != FileSys::NCAContentType::Program) { + return {ResultStatus::ErrorNCANotProgram, {}}; + } const auto exefs = nca->GetExeFS(); - - if (exefs == nullptr) - return ResultStatus::ErrorNoExeFS; + if (exefs == nullptr) { + return {ResultStatus::ErrorNoExeFS, {}}; + } directory_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(exefs, true); const auto load_result = directory_loader->Load(process); - if (load_result != ResultStatus::Success) + if (load_result.first != ResultStatus::Success) { return load_result; + } - if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) + if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) { Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); + } is_loaded = true; - - return ResultStatus::Success; + return load_result; } ResultStatus AppLoader_NCA::ReadRomFS(FileSys::VirtualFile& dir) { diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h index b9f077468..94f0ed677 100644 --- a/src/core/loader/nca.h +++ b/src/core/loader/nca.h @@ -33,7 +33,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 31e4a0c84..6a0ca389b 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -201,25 +201,25 @@ bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& fi return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base); } -ResultStatus AppLoader_NRO::Load(Kernel::Process& process) { +AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } // Load NRO const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); if (!LoadNro(process, *file, base_address)) { - return ResultStatus::ErrorLoadingNRO; + return {ResultStatus::ErrorLoadingNRO, {}}; } - if (romfs != nullptr) + if (romfs != nullptr) { Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); - - process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); + } is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, + LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; } ResultStatus AppLoader_NRO::ReadIcon(std::vector<u8>& buffer) { diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index 85b0ed644..1ffdae805 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h @@ -37,7 +37,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadIcon(std::vector<u8>& buffer) override; ResultStatus ReadProgramId(u64& out_program_id) override; diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index d7c47c197..8592b1f44 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -21,8 +21,6 @@ #include "core/memory.h" #include "core/settings.h" -#pragma optimize("", off) - namespace Loader { namespace { struct MODHeader { @@ -169,22 +167,21 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, return load_base + image_size; } -ResultStatus AppLoader_NSO::Load(Kernel::Process& process) { +AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } // Load module const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); if (!LoadModule(process, *file, base_address, true)) { - return ResultStatus::ErrorLoadingNSO; + return {ResultStatus::ErrorLoadingNSO, {}}; } LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address); - process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); - is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, + LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; } } // namespace Loader diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index 4674c3724..fdce9191c 100644 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h @@ -84,7 +84,7 @@ public: VAddr load_base, bool should_pass_arguments, std::optional<FileSys::PatchManager> pm = {}); - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; }; } // namespace Loader diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp index 7da1f8960..ad56bbb38 100644 --- a/src/core/loader/nsp.cpp +++ b/src/core/loader/nsp.cpp @@ -72,37 +72,45 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_NSP::Load(Kernel::Process& process) { +AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (title_id == 0) - return ResultStatus::ErrorNSPMissingProgramNCA; + if (title_id == 0) { + return {ResultStatus::ErrorNSPMissingProgramNCA, {}}; + } - if (nsp->GetStatus() != ResultStatus::Success) - return nsp->GetStatus(); + const auto nsp_status = nsp->GetStatus(); + if (nsp_status != ResultStatus::Success) { + return {nsp_status, {}}; + } - if (nsp->GetProgramStatus(title_id) != ResultStatus::Success) - return nsp->GetProgramStatus(title_id); + const auto nsp_program_status = nsp->GetProgramStatus(title_id); + if (nsp_program_status != ResultStatus::Success) { + return {nsp_program_status, {}}; + } if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) { - if (!Core::Crypto::KeyManager::KeyFileExists(false)) - return ResultStatus::ErrorMissingProductionKeyFile; - return ResultStatus::ErrorNSPMissingProgramNCA; + if (!Core::Crypto::KeyManager::KeyFileExists(false)) { + return {ResultStatus::ErrorMissingProductionKeyFile, {}}; + } + + return {ResultStatus::ErrorNSPMissingProgramNCA, {}}; } const auto result = secondary_loader->Load(process); - if (result != ResultStatus::Success) + if (result.first != ResultStatus::Success) { return result; + } FileSys::VirtualFile update_raw; - if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) + if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) { Service::FileSystem::SetPackedUpdate(std::move(update_raw)); + } is_loaded = true; - - return ResultStatus::Success; + return result; } ResultStatus AppLoader_NSP::ReadRomFS(FileSys::VirtualFile& file) { diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h index 953a1b508..85e870bdf 100644 --- a/src/core/loader/nsp.h +++ b/src/core/loader/nsp.h @@ -35,7 +35,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp index 89f7bbf77..1e285a053 100644 --- a/src/core/loader/xci.cpp +++ b/src/core/loader/xci.cpp @@ -48,31 +48,35 @@ FileType AppLoader_XCI::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_XCI::Load(Kernel::Process& process) { +AppLoader_XCI::LoadResult AppLoader_XCI::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (xci->GetStatus() != ResultStatus::Success) - return xci->GetStatus(); + if (xci->GetStatus() != ResultStatus::Success) { + return {xci->GetStatus(), {}}; + } - if (xci->GetProgramNCAStatus() != ResultStatus::Success) - return xci->GetProgramNCAStatus(); + if (xci->GetProgramNCAStatus() != ResultStatus::Success) { + return {xci->GetProgramNCAStatus(), {}}; + } - if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) - return ResultStatus::ErrorMissingProductionKeyFile; + if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) { + return {ResultStatus::ErrorMissingProductionKeyFile, {}}; + } const auto result = nca_loader->Load(process); - if (result != ResultStatus::Success) + if (result.first != ResultStatus::Success) { return result; + } FileSys::VirtualFile update_raw; - if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) + if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) { Service::FileSystem::SetPackedUpdate(std::move(update_raw)); + } is_loaded = true; - - return ResultStatus::Success; + return result; } ResultStatus AppLoader_XCI::ReadRomFS(FileSys::VirtualFile& file) { diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h index 436f7387c..ae7145b14 100644 --- a/src/core/loader/xci.h +++ b/src/core/loader/xci.h @@ -35,7 +35,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 4e0538bc2..f18f6226b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -26,16 +26,16 @@ namespace Memory { static Common::PageTable* current_page_table = nullptr; -void SetCurrentPageTable(Common::PageTable* page_table) { - current_page_table = page_table; +void SetCurrentPageTable(Kernel::Process& process) { + current_page_table = &process.VMManager().page_table; + + const std::size_t address_space_width = process.VMManager().GetAddressSpaceWidth(); auto& system = Core::System::GetInstance(); - if (system.IsPoweredOn()) { - system.ArmInterface(0).PageTableChanged(); - system.ArmInterface(1).PageTableChanged(); - system.ArmInterface(2).PageTableChanged(); - system.ArmInterface(3).PageTableChanged(); - } + system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); } static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, diff --git a/src/core/memory.h b/src/core/memory.h index 6845f5fe1..04e2c5f1d 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -40,8 +40,9 @@ enum : VAddr { KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, }; -/// Changes the currently active page table. -void SetCurrentPageTable(Common::PageTable* page_table); +/// Changes the currently active page table to that of +/// the given process instance. +void SetCurrentPageTable(Kernel::Process& process); /// Determines if the given VAddr is valid for the specified process. bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); @@ -71,15 +72,6 @@ u8* GetPointer(VAddr vaddr); std::string ReadCString(VAddr vaddr, std::size_t max_length); -enum class FlushMode { - /// Write back modified surfaces to RAM - Flush, - /// Remove region from the cache - Invalidate, - /// Write back modified surfaces to RAM, and also remove them from the cache - FlushAndInvalidate, -}; - /** * Mark each page touching the region as cached. */ diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 6d32ebea3..c1365879b 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -90,6 +90,7 @@ void LogSettings() { LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); + LogSetting("Renderer_UseCompatibilityProfile", Settings::values.use_compatibility_profile); LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); LogSetting("Renderer_UseAsynchronousGpuEmulation", diff --git a/src/core/settings.h b/src/core/settings.h index b84390745..5ff3634aa 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -390,6 +390,7 @@ struct Values { float resolution_factor; bool use_frame_limit; u16 frame_limit; + bool use_compatibility_profile; bool use_disk_shader_cache; bool use_accurate_gpu_emulation; bool use_asynchronous_gpu_emulation; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index e1db06811..4b17bada5 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) { } TelemetrySession::TelemetrySession() { -#ifdef ENABLE_WEB_SERVICE - backend = std::make_unique<WebService::TelemetryJson>( - Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token); -#else - backend = std::make_unique<Telemetry::NullVisitor>(); -#endif // Log one-time top-level information AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId()); @@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() { .count()}; AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time); +#ifdef ENABLE_WEB_SERVICE + auto backend = std::make_unique<WebService::TelemetryJson>( + Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token); +#else + auto backend = std::make_unique<Telemetry::NullVisitor>(); +#endif + // Complete the session, submitting to web service if necessary - // This is just a placeholder to wrap up the session once the core completes and this is - // destroyed. This will be moved elsewhere once we are actually doing real I/O with the service. field_collection.Accept(*backend); if (Settings::values.enable_telemetry) backend->Complete(); @@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() { bool TelemetrySession::SubmitTestcase() { #ifdef ENABLE_WEB_SERVICE + auto backend = std::make_unique<WebService::TelemetryJson>( + Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token); field_collection.Accept(*backend); return backend->SubmitTestcase(); #else diff --git a/src/core/telemetry_session.h b/src/core/telemetry_session.h index 023612b79..cae5a45a0 100644 --- a/src/core/telemetry_session.h +++ b/src/core/telemetry_session.h @@ -39,7 +39,6 @@ public: private: Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session - std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields }; /** diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 114bed20d..1e010e4da 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,8 @@ add_library(video_core STATIC dma_pusher.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h + engines/engine_upload.cpp + engines/engine_upload.h engines/fermi_2d.cpp engines/fermi_2d.h engines/kepler_compute.cpp @@ -36,6 +38,8 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_device.cpp + renderer_opengl/gl_device.h renderer_opengl/gl_global_cache.cpp renderer_opengl/gl_global_cache.h renderer_opengl/gl_primitive_assembler.cpp @@ -46,6 +50,8 @@ add_library(video_core STATIC renderer_opengl/gl_rasterizer_cache.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h + renderer_opengl/gl_sampler_cache.cpp + renderer_opengl/gl_sampler_cache.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_decompiler.cpp @@ -67,6 +73,8 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_opengl/utils.cpp renderer_opengl/utils.h + sampler_cache.cpp + sampler_cache.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 046d047cb..3175579cc 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -40,6 +40,13 @@ bool DmaPusher::Step() { } const CommandList& command_list{dma_pushbuffer.front()}; + ASSERT_OR_EXECUTE(!command_list.empty(), { + // Somehow the command_list is empty, in order to avoid a crash + // We ignore it and assume its size is 0. + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + return true; + }); const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; GPUVAddr dma_get = command_list_header.addr; GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); @@ -57,8 +64,8 @@ bool DmaPusher::Step() { // Push buffer non-empty, read a word command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); for (const CommandHeader& command_header : command_headers) { @@ -105,6 +112,8 @@ bool DmaPusher::Step() { dma_state.non_incrementing = false; dma_increment_once = true; break; + default: + break; } } } diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp new file mode 100644 index 000000000..082a40cd9 --- /dev/null +++ b/src/video_core/engines/engine_upload.cpp @@ -0,0 +1,52 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> + +#include "common/assert.h" +#include "video_core/engines/engine_upload.h" +#include "video_core/memory_manager.h" +#include "video_core/textures/decoders.h" + +namespace Tegra::Engines::Upload { + +State::State(MemoryManager& memory_manager, Registers& regs) + : regs{regs}, memory_manager{memory_manager} {} + +State::~State() = default; + +void State::ProcessExec(const bool is_linear) { + write_offset = 0; + copy_size = regs.line_length_in * regs.line_count; + inner_buffer.resize(copy_size); + this->is_linear = is_linear; +} + +void State::ProcessData(const u32 data, const bool is_last_call) { + const u32 sub_copy_size = std::min(4U, copy_size - write_offset); + std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); + write_offset += sub_copy_size; + if (!is_last_call) { + return; + } + const GPUVAddr address{regs.dest.Address()}; + if (is_linear) { + memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); + } else { + UNIMPLEMENTED_IF(regs.dest.z != 0); + UNIMPLEMENTED_IF(regs.dest.depth != 1); + UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); + UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); + const std::size_t dst_size = Tegra::Texture::CalculateSize( + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + tmp_buffer.resize(dst_size); + memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); + Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, + regs.dest.BlockHeight(), copy_size, inner_buffer.data(), + tmp_buffer.data()); + memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); + } +} + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h new file mode 100644 index 000000000..ef4f5839a --- /dev/null +++ b/src/video_core/engines/engine_upload.h @@ -0,0 +1,73 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines::Upload { + +struct Registers { + u32 line_length_in; + u32 line_count; + + struct { + u32 address_high; + u32 address_low; + u32 pitch; + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; + u32 width; + u32 height; + u32 depth; + u32 z; + u32 x; + u32 y; + + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); + } + + u32 BlockWidth() const { + return 1U << block_width.Value(); + } + + u32 BlockHeight() const { + return 1U << block_height.Value(); + } + + u32 BlockDepth() const { + return 1U << block_depth.Value(); + } + } dest; +}; + +class State { +public: + State(MemoryManager& memory_manager, Registers& regs); + ~State(); + + void ProcessExec(bool is_linear); + void ProcessData(u32 data, bool is_last_call); + +private: + u32 write_offset = 0; + u32 copy_size = 0; + std::vector<u8> inner_buffer; + std::vector<u8> tmp_buffer; + bool is_linear = false; + Registers& regs; + MemoryManager& memory_manager; +}; + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 2e51b7f13..45f59a4d9 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -21,6 +21,12 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as G80_2D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h + */ + #define FERMI2D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index b1d950460..7404a8163 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -4,12 +4,21 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" +#include "video_core/textures/decoders.h" namespace Tegra::Engines { -KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} +KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager) + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ + memory_manager, + regs.upload} {} KeplerCompute::~KeplerCompute() = default; @@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { regs.reg_array[method_call.method] = method_call.argument; switch (method_call.method) { + case KEPLER_COMPUTE_REG_INDEX(exec_upload): { + upload_state.ProcessExec(regs.exec_upload.linear != 0); + break; + } + case KEPLER_COMPUTE_REG_INDEX(data_upload): { + const bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } + break; + } case KEPLER_COMPUTE_REG_INDEX(launch): - // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA - // kernels) - UNREACHABLE_MSG("Compute shaders are not implemented"); + ProcessLaunch(); break; default: break; } } +void KeplerCompute::ProcessLaunch() { + + const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); + memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, + LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); + + const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; + LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index fb6cdf432..5250b8d9b 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -6,22 +6,40 @@ #include <array> #include <cstddef> +#include <vector> +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" +namespace Core { +class System; +} + namespace Tegra { class MemoryManager; } +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra::Engines { +/** + * This Engine is known as GK104_Compute. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h + */ + #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) class KeplerCompute final { public: - explicit KeplerCompute(MemoryManager& memory_manager); + explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager); ~KeplerCompute(); static constexpr std::size_t NumConstBuffers = 8; @@ -31,30 +49,181 @@ public: union { struct { - INSERT_PADDING_WORDS(0xAF); + INSERT_PADDING_WORDS(0x60); + + Upload::Registers upload; + + struct { + union { + BitField<0, 1, u32> linear; + }; + } exec_upload; + + u32 data_upload; + + INSERT_PADDING_WORDS(0x3F); + + struct { + u32 address; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8)); + } + } launch_desc_loc; + + INSERT_PADDING_WORDS(0x1); u32 launch; - INSERT_PADDING_WORDS(0xC48); + INSERT_PADDING_WORDS(0x4A7); + + struct { + u32 address_high; + u32 address_low; + u32 limit; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + } tsc; + + INSERT_PADDING_WORDS(0x3); + + struct { + u32 address_high; + u32 address_low; + u32 limit; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + } tic; + + INSERT_PADDING_WORDS(0x22); + + struct { + u32 address_high; + u32 address_low; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + } code_loc; + + INSERT_PADDING_WORDS(0x3FE); + + u32 texture_const_buffer_index; + + INSERT_PADDING_WORDS(0x374); }; std::array<u32, NUM_REGS> reg_array; }; } regs{}; + + struct LaunchParams { + static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40; + + INSERT_PADDING_WORDS(0x8); + + u32 program_start; + + INSERT_PADDING_WORDS(0x2); + + BitField<30, 1, u32> linked_tsc; + + BitField<0, 31, u32> grid_dim_x; + union { + BitField<0, 16, u32> grid_dim_y; + BitField<16, 16, u32> grid_dim_z; + }; + + INSERT_PADDING_WORDS(0x3); + + BitField<0, 16, u32> shared_alloc; + + BitField<0, 31, u32> block_dim_x; + union { + BitField<0, 16, u32> block_dim_y; + BitField<16, 16, u32> block_dim_z; + }; + + union { + BitField<0, 8, u32> const_buffer_enable_mask; + BitField<29, 2, u32> cache_layout; + } memory_config; + + INSERT_PADDING_WORDS(0x8); + + struct { + u32 address_low; + union { + BitField<0, 8, u32> address_high; + BitField<15, 17, u32> size; + }; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | + address_low); + } + } const_buffer_config[8]; + + union { + BitField<0, 20, u32> local_pos_alloc; + BitField<27, 5, u32> barrier_alloc; + }; + + union { + BitField<0, 20, u32> local_neg_alloc; + BitField<24, 5, u32> gpr_alloc; + }; + + INSERT_PADDING_WORDS(0x11); + } launch_description; + + struct { + u32 write_offset = 0; + u32 copy_size = 0; + std::vector<u8> inner_buffer; + } state{}; + static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "KeplerCompute Regs has wrong size"); + static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32), + "KeplerCompute LaunchParams has wrong size"); + /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); private: + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; + Upload::State upload_state; + + void ProcessLaunch(); }; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \ + static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D); ASSERT_REG_POSITION(launch, 0xAF); +ASSERT_REG_POSITION(tsc, 0x557); +ASSERT_REG_POSITION(tic, 0x55D); +ASSERT_REG_POSITION(code_loc, 0x582); +ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); +ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); +ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); +ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); +ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); +ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); +ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index cd51a31d7..0561f676c 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -10,12 +10,12 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" +#include "video_core/textures/decoders.h" namespace Tegra::Engines { -KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} +KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) + : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} KeplerMemory::~KeplerMemory() = default; @@ -27,30 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { switch (method_call.method) { case KEPLERMEMORY_REG_INDEX(exec): { - state.write_offset = 0; + upload_state.ProcessExec(regs.exec.linear != 0); break; } case KEPLERMEMORY_REG_INDEX(data): { - ProcessData(method_call.argument); + const bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } break; } } } -void KeplerMemory::ProcessData(u32 data) { - ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); - ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); - - // We have to invalidate the destination region to evict any outdated surfaces from the cache. - // We do this before actually writing the new data because the destination address might - // contain a dirty surface that will have to be written back to memory. - const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; - rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); - memory_manager.Write<u32>(address, data); - - system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); - - state.write_offset++; -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 78b6c3e45..f3bc675a9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -6,9 +6,11 @@ #include <array> #include <cstddef> +#include <vector> #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" namespace Core { @@ -19,19 +21,20 @@ namespace Tegra { class MemoryManager; } -namespace VideoCore { -class RasterizerInterface; -} - namespace Tegra::Engines { +/** + * This Engine is known as P2MF. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h + */ + #define KEPLERMEMORY_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) class KeplerMemory final { public: - KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); + KeplerMemory(Core::System& system, MemoryManager& memory_manager); ~KeplerMemory(); /// Write the value to the register identified by method. @@ -44,26 +47,7 @@ public: struct { INSERT_PADDING_WORDS(0x60); - u32 line_length_in; - u32 line_count; - - struct { - u32 address_high; - u32 address_low; - u32 pitch; - u32 block_dimensions; - u32 width; - u32 height; - u32 depth; - u32 z; - u32 x; - u32 y; - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - } dest; + Upload::Registers upload; struct { union { @@ -79,25 +63,17 @@ public: }; } regs{}; - struct { - u32 write_offset = 0; - } state{}; - private: Core::System& system; - VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; - - void ProcessData(u32 data); + Upload::State upload_state; }; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(line_length_in, 0x60); -ASSERT_REG_POSITION(line_count, 0x61); -ASSERT_REG_POSITION(dest, 0x62); +ASSERT_REG_POSITION(upload, 0x60); ASSERT_REG_POSITION(exec, 0x6C); ASSERT_REG_POSITION(data, 0x6D); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74403eed4..39968d403 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ - *this} { + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, + macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { InitializeRegisterDefaults(); } @@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() { // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is // needed for ARMS. - for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) { - regs.viewports[viewport].depth_range_near = 0.0f; - regs.viewports[viewport].depth_range_far = 1.0f; + for (auto& viewport : regs.viewports) { + viewport.depth_range_near = 0.0f; + viewport.depth_range_far = 1.0f; } // Doom and Bomberman seems to use the uninitialized registers and just enable blend @@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.blend.equation_a = Regs::Blend::Equation::Add; regs.blend.factor_source_a = Regs::Blend::Factor::One; regs.blend.factor_dest_a = Regs::Blend::Factor::Zero; - for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) { - regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add; - regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One; - regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero; - regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add; - regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One; - regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero; + for (auto& blend : regs.independent_blend) { + blend.equation_rgb = Regs::Blend::Equation::Add; + blend.factor_source_rgb = Regs::Blend::Factor::One; + blend.factor_dest_rgb = Regs::Blend::Factor::Zero; + blend.equation_a = Regs::Blend::Equation::Add; + blend.factor_source_a = Regs::Blend::Factor::One; + blend.factor_dest_a = Regs::Blend::Factor::Zero; } regs.stencil_front_op_fail = Regs::StencilOp::Keep; regs.stencil_front_op_zfail = Regs::StencilOp::Keep; @@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() { // TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a // default of enabled fixes rendering here. - for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) { - regs.color_mask[color_mask].R.Assign(1); - regs.color_mask[color_mask].G.Assign(1); - regs.color_mask[color_mask].B.Assign(1); - regs.color_mask[color_mask].A.Assign(1); + for (auto& color_mask : regs.color_mask) { + color_mask.R.Assign(1); + color_mask.G.Assign(1); + color_mask.B.Assign(1); + color_mask.A.Assign(1); } // Commercial games seem to assume this value is enabled and nouveau sets this value manually. @@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { // Vertex buffer if (method >= MAXWELL3D_REG_INDEX(vertex_array) && - method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { + method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && - method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { + method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && - method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { + method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); } } @@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessSyncPoint(); break; } + case MAXWELL3D_REG_INDEX(exec_upload): { + upload_state.ProcessExec(regs.exec_upload.linear != 0); + break; + } + case MAXWELL3D_REG_INDEX(data_upload): { + const bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + dirty_flags.OnMemoryWrite(); + } + break; + } default: break; } @@ -418,7 +430,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; - memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); + memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || tic_entry.header_version == Texture::TICHeaderVersion::Pitch, @@ -430,7 +442,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { const auto a_type = tic_entry.a_type.Value(); // TODO(Subv): Different data types for separate components are not supported - ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); + DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); return tic_entry; } @@ -439,7 +451,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; - memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); + memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } @@ -482,19 +494,8 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt return textures; } -Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, - std::size_t offset) const { - auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; - auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - +Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const { Texture::FullTextureInfo tex_info{}; tex_info.index = static_cast<u32>(offset); @@ -511,6 +512,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, return tex_info; } +Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, + std::size_t offset) const { + const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; + ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); + + const GPUVAddr tex_info_address = + tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); + + ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + + return GetTextureInfo(tex_handle, offset); +} + u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; @@ -524,4 +541,12 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } +u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { + const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& buffer = shader_stage.const_buffers[const_buffer]; + u32 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 321af3297..48e4fec33 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include <array> #include <bitset> +#include <type_traits> #include <unordered_map> #include <vector> @@ -14,6 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" #include "video_core/textures/texture.h" @@ -32,6 +34,12 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as GF100_3D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h + */ + #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) @@ -243,9 +251,10 @@ public: return "10_10_10_2"; case Size::Size_11_11_10: return "11_11_10"; + default: + UNREACHABLE(); + return {}; } - UNREACHABLE(); - return {}; } std::string TypeString() const { @@ -579,7 +588,18 @@ public: u32 bind; } macros; - INSERT_PADDING_WORDS(0x69); + INSERT_PADDING_WORDS(0x17); + + Upload::Registers upload; + struct { + union { + BitField<0, 1, u32> linear; + }; + } exec_upload; + + u32 data_upload; + + INSERT_PADDING_WORDS(0x44); struct { union { @@ -1088,6 +1108,7 @@ public: } regs{}; static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); + static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); struct State { struct ConstBufferInfo { @@ -1131,12 +1152,18 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + /// Given a Texture Handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const; + /// Returns a list of enabled textures for the specified shader stage. std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array<u32, 0x40000>; @@ -1169,6 +1196,8 @@ private: /// Interpreter for the macro codes uploaded to the GPU. MacroInterpreter macro_interpreter; + Upload::State upload_state; + /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -1212,6 +1241,9 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(macros, 0x45); +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D); ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(tfb_enabled, 0x1D1); ASSERT_REG_POSITION(rt, 0x200); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2426d0067..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() { ASSERT(regs.exec.enable_2d == 1); - const std::size_t copy_size = regs.x_count * regs.y_count; + if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { + ASSERT(regs.src_params.size_z == 1); + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; + const std::size_t src_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, + regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); - auto source_ptr{memory_manager.GetPointer(source)}; - auto dst_ptr{memory_manager.GetPointer(dest)}; + const std::size_t dst_size = regs.dst_pitch * regs.y_count; - if (!source_ptr) { - LOG_ERROR(HW_GPU, "source_ptr is invalid"); - return; - } + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } - if (!dst_ptr) { - LOG_ERROR(HW_GPU, "dst_ptr is invalid"); - return; - } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { - // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated - // copying. - rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); + memory_manager.ReadBlock(source, read_buffer.data(), src_size); + memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - // We have to invalidate the destination region to evict any outdated surfaces from the - // cache. We do this before actually writing the new data because the destination address - // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); - }; + Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, + regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), + write_buffer.data(), regs.src_params.BlockHeight(), + regs.src_params.pos_x, regs.src_params.pos_y); - if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { - ASSERT(regs.src_params.size_z == 1); - // If the input is tiled and the output is linear, deswizzle the input and copy it over. + memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); + } else { + ASSERT(regs.dst_params.BlockDepth() == 1); - const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; + const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; - FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, - copy_size * src_bytes_per_pixel); + const std::size_t dst_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, + regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); - Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, - regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, - regs.src_params.BlockHeight(), regs.src_params.pos_x, - regs.src_params.pos_y); - } else { - ASSERT(regs.dst_params.size_z == 1); - ASSERT(regs.src_pitch == regs.x_count); + const std::size_t dst_layer_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, + regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); - const u32 src_bpp = regs.src_pitch / regs.x_count; + const std::size_t src_size = regs.src_pitch * regs.y_count; - FlushAndInvalidate(regs.src_pitch * regs.y_count, - regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } + + memory_manager.ReadBlock(source, read_buffer.data(), src_size); + memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); // If the input is linear and the output is tiled, swizzle the input and copy it over. Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, - src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); + src_bytes_per_pixel, + write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, + read_buffer.data(), regs.dst_params.BlockHeight()); + + memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c6b649842..e5942f671 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -6,6 +6,7 @@ #include <array> #include <cstddef> +#include <vector> #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -25,6 +26,11 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as GK104_Copy. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml + */ + class MaxwellDMA final { public: explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, @@ -63,6 +69,16 @@ public: static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); + enum class ComponentMode : u32 { + Src0 = 0, + Src1 = 1, + Src2 = 2, + Src3 = 3, + Const0 = 4, + Const1 = 5, + Zero = 6, + }; + enum class CopyMode : u32 { None = 0, Unk1 = 1, @@ -128,7 +144,26 @@ public: u32 x_count; u32 y_count; - INSERT_PADDING_WORDS(0xBB); + INSERT_PADDING_WORDS(0xB8); + + u32 const0; + u32 const1; + union { + BitField<0, 4, ComponentMode> component0; + BitField<4, 4, ComponentMode> component1; + BitField<8, 4, ComponentMode> component2; + BitField<12, 4, ComponentMode> component3; + BitField<16, 2, u32> component_size; + BitField<20, 3, u32> src_num_components; + BitField<24, 3, u32> dst_num_components; + + u32 SrcBytePerPixel() const { + return src_num_components.Value() * component_size.Value(); + } + u32 DstBytePerPixel() const { + return dst_num_components.Value() * component_size.Value(); + } + } swizzle_config; Parameters dst_params; @@ -149,6 +184,9 @@ private: MemoryManager& memory_manager; + std::vector<u8> read_buffer; + std::vector<u8> write_buffer; + /// Performs the copy from the source buffer to the destination buffer as configured in the /// registers. void HandleCopy(); @@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104); ASSERT_REG_POSITION(dst_pitch, 0x105); ASSERT_REG_POSITION(x_count, 0x106); ASSERT_REG_POSITION(y_count, 0x107); +ASSERT_REG_POSITION(const0, 0x1C0); +ASSERT_REG_POSITION(const1, 0x1C1); +ASSERT_REG_POSITION(swizzle_config, 0x1C2); ASSERT_REG_POSITION(dst_params, 0x1C3); ASSERT_REG_POSITION(src_params, 0x1CA); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 2e1e96c81..e5b4eadea 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -387,6 +387,20 @@ enum class IpaSampleMode : u64 { Offset = 2, }; +enum class LmemLoadCacheManagement : u64 { + Default = 0, + LU = 1, + CI = 2, + CV = 3, +}; + +enum class LmemStoreCacheManagement : u64 { + Default = 0, + CG = 1, + CS = 2, + WT = 3, +}; + struct IpaMode { IpaInterpMode interpolation_mode; IpaSampleMode sampling_mode; @@ -782,7 +796,7 @@ union Instruction { } ld_l; union { - BitField<44, 2, u64> unknown; + BitField<44, 2, LmemStoreCacheManagement> cache_management; } st_l; union { @@ -792,6 +806,12 @@ union Instruction { } ldg; union { + BitField<48, 3, UniformType> type; + BitField<46, 2, u64> cache_mode; + BitField<20, 24, s64> immediate_offset; + } stg; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -917,21 +937,34 @@ union Instruction { } iset; union { - BitField<8, 2, Register::Size> dest_size; - BitField<10, 2, Register::Size> src_size; - BitField<12, 1, u64> is_output_signed; - BitField<13, 1, u64> is_input_signed; - BitField<41, 2, u64> selector; + BitField<41, 2, u64> selector; // i2i and i2f only BitField<45, 1, u64> negate_a; BitField<49, 1, u64> abs_a; + BitField<10, 2, Register::Size> src_size; + BitField<13, 1, u64> is_input_signed; + BitField<8, 2, Register::Size> dst_size; + BitField<12, 1, u64> is_output_signed; + + union { + BitField<39, 2, u64> tab5cb8_2; + } i2f; union { BitField<39, 2, F2iRoundingOp> rounding; } f2i; union { - BitField<39, 4, F2fRoundingOp> rounding; + BitField<8, 2, Register::Size> src_size; + BitField<10, 2, Register::Size> dst_size; + BitField<39, 4, u64> rounding; + // H0, H1 extract for F16 missing + BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value + F2fRoundingOp GetRoundingMode() const { + constexpr u64 rounding_mask = 0x0B; + return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask); + } } f2f; + } conversion; union { @@ -967,6 +1000,38 @@ union Instruction { } tex; union { + BitField<28, 1, u64> array; + BitField<29, 2, TextureType> texture_type; + BitField<31, 4, u64> component_mask; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<36, 1, u64> aoffi_flag; + BitField<37, 3, TextureProcessMode> process_mode; + + bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; + } + + TextureProcessMode GetTextureProcessMode() const { + return process_mode; + } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::AOFFI: + return aoffi_flag != 0; + default: + break; + } + return false; + } + } tex_b; + + union { BitField<22, 6, TextureQueryType> query_type; BitField<31, 4, u64> component_mask; BitField<49, 1, u64> nodep_flag; @@ -1312,7 +1377,9 @@ public: LDG, // Load from global memory STG, // Store in global memory TEX, + TEX_B, // Texture Load Bindless TXQ, // Texture Query + TXQ_B, // Texture Query Bindless TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLDS, // Texture Load with scalar/non-vec4 source/destinations TLD4, // Texture Load 4 @@ -1580,7 +1647,9 @@ private: INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), + INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), + INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), @@ -1678,7 +1747,7 @@ private: INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), + INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4461083ff..52706505b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); - kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); + kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); - kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); + kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); } GPU::~GPU() = default; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index de30ea354..fe6628923 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -207,6 +207,11 @@ public: }; } regs{}; + /// Performs any additional setup necessary in order to begin GPU emulation. + /// This can be used to launch any necessary threads and register any necessary + /// core timing events. + virtual void Start() = 0; + /// Push GPU command entries to be processed virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index db507cf04..d4e2553a9 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -9,10 +9,14 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {} + : GPU(system, renderer), gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; +void GPUAsynch::Start() { + gpu_thread.StartThread(renderer, *dma_pusher); +} + void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { gpu_thread.SubmitList(std::move(entries)); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 1dcc61a6c..30be74cba 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -13,16 +13,13 @@ class RendererBase; namespace VideoCommon { -namespace GPUThread { -class ThreadManager; -} // namespace GPUThread - /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch : public Tegra::GPU { public: explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); ~GPUAsynch() override; + void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 2cfc900ed..45e43b1dc 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -8,10 +8,12 @@ namespace VideoCommon { GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) - : Tegra::GPU(system, renderer) {} + : GPU(system, renderer) {} GPUSynch::~GPUSynch() = default; +void GPUSynch::Start() {} + void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); dma_pusher->DispatchCalls(); diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 766b5631c..3031fcf72 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -18,6 +18,7 @@ public: explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); ~GPUSynch() override; + void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index cc56cf467..1e2ff46b0 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p renderer.Rasterizer().FlushRegion(data->addr, data->size); } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { renderer.Rasterizer().InvalidateRegion(data->addr, data->size); - } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) { + } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { return; } else { UNREACHABLE(); @@ -55,19 +55,24 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p } } -ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, - Tegra::DmaPusher& dma_pusher) - : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} { - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); -} +ThreadManager::ThreadManager(Core::System& system) : system{system} {} ThreadManager::~ThreadManager() { + if (!thread.joinable()) { + return; + } + // Notify GPU thread that a shutdown is pending PushCommand(EndProcessingCommand()); thread.join(); } +void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { + thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; + synchronization_event = system.CoreTiming().RegisterEvent( + "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); +} + void ThreadManager::SubmitList(Tegra::CommandList&& entries) { const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; @@ -113,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) { // Wait for the GPU to be idle (all commands to be executed) { MICROPROFILE_SCOPE(GPU_wait); - std::unique_lock<std::mutex> lock{synchronization_mutex}; + std::unique_lock lock{synchronization_mutex}; synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); } } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 62bcea5bb..05a168a72 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -81,12 +81,6 @@ struct CommandDataContainer { CommandDataContainer(CommandData&& data, u64 next_fence) : data{std::move(data)}, fence{next_fence} {} - CommandDataContainer& operator=(const CommandDataContainer& t) { - data = std::move(t.data); - fence = t.fence; - return *this; - } - CommandData data; u64 fence{}; }; @@ -109,7 +103,7 @@ struct SynchState final { void TrySynchronize() { if (IsSynchronized()) { - std::lock_guard<std::mutex> lock{synchronization_mutex}; + std::lock_guard lock{synchronization_mutex}; synchronization_condition.notify_one(); } } @@ -138,10 +132,12 @@ struct SynchState final { /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, - Tegra::DmaPusher& dma_pusher); + explicit ThreadManager(Core::System& system); ~ThreadManager(); + /// Creates and starts the GPU thread. + void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + /// Push GPU command entries to be processed void SubmitList(Tegra::CommandList&& entries); diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 524d9ea5a..fbea107ca 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -118,10 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { static_cast<u32>(opcode.operation.Value())); } - if (opcode.is_exit) { + // An instruction with the Exit flag will not actually + // cause an exit if it's executed inside a delay slot. + if (opcode.is_exit && !is_delay_slot) { // Exit has a delay slot, execute the next instruction - // Note: Executing an exit during a branch delay slot will cause the instruction at the - // branch target to be executed before exiting. Step(offset, true); return false; } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 0f4e820aa..5d8d126c1 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste UpdatePageTableForVMA(initial_vma); } +MemoryManager::~MemoryManager() = default; + GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; @@ -199,7 +201,15 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } -void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { +bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { + const GPUVAddr end = start + size; + const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); + const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); + const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); + return range == size; +} + +void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; @@ -226,7 +236,30 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t } } -void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { +void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, + const std::size_t size) const { + std::size_t remaining_size{size}; + std::size_t page_index{src_addr >> page_bits}; + std::size_t page_offset{src_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; + const u8* page_pointer = page_table.pointers[page_index]; + if (page_pointer) { + const u8* src_ptr{page_pointer + page_offset}; + std::memcpy(dest_buffer, src_ptr, copy_amount); + } else { + std::memset(dest_buffer, 0, copy_amount); + } + page_index++; + page_offset = 0; + dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; + remaining_size -= copy_amount; + } +} + +void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; @@ -253,7 +286,28 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std:: } } -void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { +void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, + const std::size_t size) { + std::size_t remaining_size{size}; + std::size_t page_index{dest_addr >> page_bits}; + std::size_t page_offset{dest_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; + u8* page_pointer = page_table.pointers[page_index]; + if (page_pointer) { + u8* dest_ptr{page_pointer + page_offset}; + std::memcpy(dest_ptr, src_buffer, copy_amount); + } + page_index++; + page_offset = 0; + src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; + remaining_size -= copy_amount; + } +} + +void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; @@ -281,6 +335,12 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t } } +void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { + std::vector<u8> tmp_buffer(size); + ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); + WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); +} + void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, VAddr backing_addr) { LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 647cbf93a..113f9d8f3 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -47,7 +47,8 @@ struct VirtualMemoryArea { class MemoryManager final { public: - MemoryManager(VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); + ~MemoryManager(); GPUVAddr AllocateSpace(u64 size, u64 align); GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); @@ -65,10 +66,33 @@ public: u8* GetPointer(GPUVAddr addr); const u8* GetPointer(GPUVAddr addr) const; + /// Returns true if the block is continuous in host memory, false otherwise + bool IsBlockContinuous(GPUVAddr start, std::size_t size) const; + + /** + * ReadBlock and WriteBlock are full read and write operations over virtual + * GPU Memory. It's important to use these when GPU memory may not be continuous + * in the Host Memory counterpart. Note: This functions cause Host GPU Memory + * Flushes and Invalidations, respectively to each operation. + */ void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + /** + * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and + * WriteBlock respectively. In this versions, no flushing or invalidation is actually + * done and their performance is similar to a memcpy. This functions can be used + * on either of this 2 scenarios instead of their safe counterpart: + * - Memory which is sure to never be represented in the Host GPU. + * - Memory Managed by a Cache Manager. Example: Texture Flushing should use + * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture + * being flushed. + */ + void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); + void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + private: using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; using VMAHandle = VMAMap::const_iterator; @@ -88,10 +112,10 @@ private: /** * Maps an unmanaged host memory pointer at a given address. * - * @param target The guest address to start the mapping at. - * @param memory The memory to be mapped. - * @param size Size of the mapping. - * @param state MemoryState tag to attach to the VMA. + * @param target The guest address to start the mapping at. + * @param memory The memory to be mapped. + * @param size Size of the mapping in bytes. + * @param backing_addr The base address of the range to back this mapping. */ VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr); @@ -101,7 +125,7 @@ private: /// Converts a VMAHandle to a mutable VMAIter. VMAIter StripIterConstness(const VMAHandle& iter); - /// Marks as the specfied VMA as allocated. + /// Marks as the specified VMA as allocated. VMAIter Allocate(VMAIter vma); /** diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 291772186..0c4ea1494 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -37,9 +37,6 @@ public: /// Gets the size of the shader in guest memory, required for cache management virtual std::size_t GetSizeInBytes() const = 0; - /// Wriets any cached resources back to memory - virtual void Flush() = 0; - /// Sets whether the cached object should be considered registered void SetIsRegistered(bool registered) { is_registered = registered; @@ -147,8 +144,9 @@ protected: object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); + const CacheAddr addr = object->GetCacheAddr(); interval_cache.subtract({GetInterval(object), ObjectSet{object}}); - map_cache.erase(object->GetCacheAddr()); + map_cache.erase(addr); } /// Returns a ticks counter used for tracking when cached objects were last modified @@ -158,6 +156,8 @@ protected: return ++modified_ticks; } + virtual void FlushObjectInner(const T& object) = 0; + /// Flushes the specified object, updating appropriate cache state as needed void FlushObject(const T& object) { std::lock_guard lock{mutex}; @@ -165,7 +165,7 @@ protected: if (!object->IsDirty()) { return; } - object->Flush(); + FlushObjectInner(object); object->MarkAsModified(false, *this); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fc33aa433..f9247a40e 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -42,9 +42,6 @@ public: return alignment; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - private: VAddr cpu_addr{}; std::size_t size{}; @@ -75,6 +72,9 @@ public: protected: void AlignBuffer(std::size_t alignment); + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + private: OGLStreamBuffer stream_buffer; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp new file mode 100644 index 000000000..b6d9e0ddb --- /dev/null +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -0,0 +1,45 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstddef> +#include <glad/glad.h> + +#include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_device.h" + +namespace OpenGL { + +namespace { +template <typename T> +T GetInteger(GLenum pname) { + GLint temporary; + glGetIntegerv(pname, &temporary); + return static_cast<T>(temporary); +} +} // Anonymous namespace + +Device::Device() { + uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + has_variable_aoffi = TestVariableAoffi(); +} + +bool Device::TestVariableAoffi() { + const GLchar* AOFFI_TEST = R"(#version 430 core +uniform sampler2D tex; +uniform ivec2 variable_offset; +void main() { + gl_Position = textureOffset(tex, vec2(0), variable_offset); +} +)"; + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)}; + GLint link_status{}; + glGetProgramiv(shader, GL_LINK_STATUS, &link_status); + glDeleteProgram(shader); + + const bool supported{link_status == GL_TRUE}; + LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported); + return supported; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h new file mode 100644 index 000000000..78ff5ee58 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_device.h @@ -0,0 +1,30 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> + +namespace OpenGL { + +class Device { +public: + Device(); + + std::size_t GetUniformBufferAlignment() const { + return uniform_buffer_alignment; + } + + bool HasVariableAoffi() const { + return has_variable_aoffi; + } + +private: + static bool TestVariableAoffi(); + + std::size_t uniform_buffer_alignment{}; + bool has_variable_aoffi{}; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 8d9ee81f1..ea4a593af 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -14,28 +14,28 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, + max_size{max_size} { buffer.Create(); - // Bind and unbind the buffer so it gets allocated by the driver - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } -void CachedGlobalRegion::Reload(u32 size_) { - constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); +CachedGlobalRegion::~CachedGlobalRegion() = default; +void CachedGlobalRegion::Reload(u32 size_) { size = size_; if (size > max_size) { size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, + LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, max_size); } + glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); +} - // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); +void CachedGlobalRegion::Flush() { + LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); + glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); } GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { @@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, - u8* host_ptr) { +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, + u32 size) { GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; if (!region) { // No reserved surface available, create a new one and reserve it auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); - region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; + ASSERT(cpu_addr); + + region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); ReserveGlobalRegion(region); } region->Reload(size); @@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} {} + : RasterizerCache{rasterizer} { + GLint max_ssbo_size_; + glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); + max_ssbo_size = static_cast<u32>(max_ssbo_size_); +} GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( const GLShader::GlobalMemoryEntry& global_region, @@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( auto& gpu{Core::System::GetInstance().GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()}; const auto actual_addr{memory_manager.Read<u64>(addr)}; @@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); + region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 5a21ab66f..2d467a240 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -19,7 +19,7 @@ namespace OpenGL { namespace GLShader { class GlobalMemoryEntry; -} // namespace GLShader +} class RasterizerOpenGL; class CachedGlobalRegion; @@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); + explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); + ~CachedGlobalRegion(); VAddr GetCpuAddr() const override { return cpu_addr; @@ -45,14 +46,14 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - // TODO(Rodrigo): When global memory is written (STG), implement flushing - void Flush() override { - UNIMPLEMENTED(); - } + void Flush(); private: VAddr cpu_addr{}; + u8* host_ptr{}; u32 size{}; + u32 max_size{}; + OGLBuffer buffer; }; @@ -64,12 +65,18 @@ public: GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); +protected: + void FlushObjectInner(const GlobalRegion& object) override { + object->Flush(); + } + private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); + GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); void ReserveGlobalRegion(GlobalRegion region); std::unordered_map<CacheAddr, GlobalRegion> reserve; + u32 max_ssbo_size{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d250d5cbb..dbd8049f5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -99,22 +99,14 @@ struct FramebufferCacheKey { }; RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, + : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { - // Create sampler objects - for (std::size_t i = 0; i < texture_samplers.size(); ++i) { - texture_samplers[i].Create(); - state.texture_units[i].sampler = texture_samplers[i].sampler.handle; - } - OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); - LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); CheckExtensions(); } @@ -269,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() { // MakeQuadArray always generates u32 indexes params.index_format = GL_UNSIGNED_INT; params.count = (regs.vertex_buffer.count / 4) * 6; - params.index_buffer_offset = - primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count); + params.index_buffer_offset = primitive_assembler.MakeQuadArray( + regs.vertex_buffer.first, regs.vertex_buffer.count); } return params; } @@ -313,6 +305,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { case Maxwell::ShaderProgram::Geometry: shader_program_manager->UseTrivialGeometryShader(); break; + default: + break; } continue; } @@ -321,8 +315,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu, stage); - const GLintptr offset = buffer_cache.UploadHostMemory( - &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); + const GLintptr offset = + buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); // Bind the emulation info buffer bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, @@ -582,9 +576,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( } void RasterizerOpenGL::Clear() { - const auto prev_state{state}; - SCOPE_EXIT({ prev_state.Apply(); }); - const auto& regs = system.GPU().Maxwell3D().regs; bool use_color{}; bool use_depth{}; @@ -656,7 +647,10 @@ void RasterizerOpenGL::Clear() { clear_state.EmulateViewportWithScissor(); } - clear_state.Apply(); + clear_state.ApplyColorMask(); + clear_state.ApplyDepth(); + clear_state.ApplyStencilTest(); + clear_state.ApplyViewport(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); @@ -706,23 +700,24 @@ void RasterizerOpenGL::DrawArrays() { // Add space for index buffer (keeping in mind non-core primitives) switch (regs.draw.topology) { case Maxwell::PrimitiveTopology::Quads: - buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + + buffer_size = Common::AlignUp(buffer_size, 4) + primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); break; default: if (is_indexed) { - buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize(); + buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); } break; } // Uniform space for the 5 shader stages - buffer_size = - Common::AlignUp<std::size_t>(buffer_size, 4) + - (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; + buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + + (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * + Maxwell::MaxShaderStage; // Add space for at least 18 constant buffers - buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); + buffer_size += + Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); const bool invalidate = buffer_cache.Map(buffer_size); if (invalidate) { @@ -756,6 +751,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } res_cache.FlushRegion(addr, size); + global_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -812,92 +808,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SamplerInfo::Create() { - sampler.Create(); - mag_filter = Tegra::Texture::TextureFilter::Linear; - min_filter = Tegra::Texture::TextureFilter::Linear; - wrap_u = Tegra::Texture::WrapMode::Wrap; - wrap_v = Tegra::Texture::WrapMode::Wrap; - wrap_p = Tegra::Texture::WrapMode::Wrap; - use_depth_compare = false; - depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; - - // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR - glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); - - // Other attributes have correct defaults -} - -void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { - const GLuint sampler_id = sampler.handle; - if (mag_filter != config.mag_filter) { - mag_filter = config.mag_filter; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - } - if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) { - min_filter = config.min_filter; - mipmap_filter = config.mipmap_filter; - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter)); - } - - if (wrap_u != config.wrap_u) { - wrap_u = config.wrap_u; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); - } - if (wrap_v != config.wrap_v) { - wrap_v = config.wrap_v; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); - } - if (wrap_p != config.wrap_p) { - wrap_p = config.wrap_p; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); - } - - if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) { - use_depth_compare = enabled; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - } - - if (depth_compare_func != config.depth_compare_func) { - depth_compare_func = config.depth_compare_func; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(depth_compare_func)); - } - - if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) { - border_color = new_border_color; - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data()); - } - - if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) { - max_anisotropic = anisotropic; - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); - } - } - - if (const float min = config.GetMinLod(); min_lod != min) { - min_lod = min; - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod); - } - if (const float max = config.GetMaxLod(); max_lod != max) { - max_lod = max; - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod); - } - - if (const float bias = config.GetLodBias(); lod_bias != bias) { - lod_bias = bias; - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias); - } -} - void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, GLuint program_handle, BaseBindings base_bindings) { @@ -939,8 +849,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader size = Common::AlignUp(size, sizeof(GLvec4)); ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - const GLintptr const_buffer_offset = buffer_cache.UploadMemory( - buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); + const GLintptr const_buffer_offset = + buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); } @@ -953,6 +863,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; const auto& region{global_cache.GetGlobalRegion(entry, stage)}; + if (entry.IsWritten()) { + region->MarkAsModified(true, global_cache); + } bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, static_cast<GLsizeiptr>(region->GetSizeInBytes())); } @@ -970,10 +883,18 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + Tegra::Texture::FullTextureInfo texture; + if (entry.IsBindless()) { + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); + texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + } else { + texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } const u32 current_bindpoint = base_bindings.sampler + bindpoint; - texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); + state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { state.texture_units[current_bindpoint].texture = @@ -1001,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { viewport.y = viewport_rect.bottom; viewport.width = viewport_rect.GetWidth(); viewport.height = viewport_rect.GetHeight(); - viewport.depth_range_far = regs.viewports[i].depth_range_far; - viewport.depth_range_near = regs.viewports[i].depth_range_near; + viewport.depth_range_far = src.depth_range_far; + viewport.depth_range_near = src.depth_range_near; } state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; @@ -1214,7 +1135,9 @@ void RasterizerOpenGL::SyncTransformFeedback() { void RasterizerOpenGL::SyncPointState() { const auto& regs = system.GPU().Maxwell3D().regs; - state.point.size = regs.point_size; + // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid + // in OpenGL). + state.point.size = std::max(1.0f, regs.point_size); } void RasterizerOpenGL::SyncPolygonOffset() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e4c64ae71..71b9c5ead 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -21,10 +21,12 @@ #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" @@ -71,39 +73,7 @@ public: static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - static constexpr std::size_t MaxGlobalMemorySize = 0x10000; - static_assert(MaxGlobalMemorySize % sizeof(float) == 0, - "The maximum size of a global memory must be a multiple of the size of float"); - private: - class SamplerInfo { - public: - OGLSampler sampler; - - /// Creates the sampler object, initializing its state so that it's in sync with the - /// SamplerInfo struct. - void Create(); - /// Syncs the sampler object with the config, updating any necessary state. - void SyncWithConfig(const Tegra::Texture::TSCEntry& info); - - private: - Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureMipmapFilter mipmap_filter = - Tegra::Texture::TextureMipmapFilter::None; - Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; - bool use_depth_compare = false; - Tegra::Texture::DepthCompareFunc depth_compare_func = - Tegra::Texture::DepthCompareFunc::Always; - GLvec4 border_color = {}; - float min_lod = 0.0f; - float max_lod = 16.0f; - float lod_bias = 0.0f; - float max_anisotropic = 1.0f; - }; - struct FramebufferConfigState { bool using_color_fb{}; bool using_depth_fb{}; @@ -203,14 +173,15 @@ private: /// but are needed for correct emulation void CheckExtensions(); + const Device device; OpenGLState state; RasterizerCacheOpenGL res_cache; ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; + SamplerCacheOpenGL sampler_cache; Core::System& system; - ScreenInfo& screen_info; std::unique_ptr<GLShader::ProgramManager> shader_program_manager; @@ -223,12 +194,9 @@ private: FramebufferConfigState current_framebuffer_config_state; std::pair<bool, bool> current_depth_stencil_usage{}; - std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; PrimitiveAssembler primitive_assembler{buffer_cache}; - GLint uniform_buffer_alignment; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f2ffc4710..a7681902e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -281,10 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; - if (!params.is_tiled) { - const u32 bpp = params.GetFormatBpp() / 8; - params.pitch = config.width * bpp; - } + params.pitch = config.pitch; params.height = config.height; params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; @@ -631,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params) } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGLBuffer() { +void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - gl_buffer.resize(params.max_mip_level); + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; + if (gl_buffer.size() < params.max_mip_level) + gl_buffer.resize(params.max_mip_level); for (u32 i = 0; i < params.max_mip_level; i++) gl_buffer[i].resize(params.GetMipmapSizeGL(i)); if (params.is_tiled) { @@ -643,13 +642,16 @@ void CachedSurface::LoadGLBuffer() { SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); } else { const u32 bpp = params.GetFormatBpp() / 8; - const u32 copy_size = params.width * bpp; + const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) / + GetDefaultBlockWidth(params.pixel_format); if (params.pitch == copy_size) { std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); } else { + const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) / + GetDefaultBlockHeight(params.pixel_format); const u8* start{params.host_ptr}; u8* write_to = gl_buffer[0].data(); - for (u32 h = params.height; h > 0; h--) { + for (u32 h = height; h > 0; h--) { std::memcpy(write_to, start, copy_size); start += params.pitch; write_to += copy_size; @@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() { } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer() { +void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; // OpenGL temporary buffer needs to be big enough to store raw texture size - gl_buffer.resize(1); gl_buffer[0].resize(GetSizeInBytes()); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); @@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() { } } -void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, - GLuint draw_fb_handle) { +void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, + GLuint read_fb_handle, GLuint draw_fb_handle) { const auto& rect{params.GetRect(mip_map)}; + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; + // Load data from memory to the surface const auto x0 = static_cast<GLint>(rect.left); const auto y0 = static_cast<GLint>(rect.bottom); @@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, tuple.type, &gl_buffer[mip_map][buffer_offset]); break; case SurfaceTarget::TextureCubemap: { - std::size_t start = buffer_offset; for (std::size_t face = 0; face < params.depth; ++face) { glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face), static_cast<GLsizei>(rect.GetWidth()), @@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() { } MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { +void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, + GLuint read_fb_handle, GLuint draw_fb_handle) { MICROPROFILE_SCOPE(OpenGL_TextureUL); for (u32 i = 0; i < params.max_mip_level; i++) - UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle); + UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle); } void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, @@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { - surface->LoadGLBuffer(); - surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->LoadGLBuffer(temporal_memory); + surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); surface->MarkForReload(false); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index db280dbb3..6263ef3e7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -355,6 +355,12 @@ namespace OpenGL { class RasterizerOpenGL; +// This is used to store temporary big buffers, +// instead of creating/destroying all the time +struct RasterizerTemporaryMemory { + std::vector<std::vector<u8>> gl_buffer; +}; + class CachedSurface final : public RasterizerCacheObject { public: explicit CachedSurface(const SurfaceParams& params); @@ -371,10 +377,6 @@ public: return memory_size; } - void Flush() override { - FlushGLBuffer(); - } - const OGLTexture& Texture() const { return texture; } @@ -397,11 +399,12 @@ public: } // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(); - void FlushGLBuffer(); + void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); + void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); // Upload data in gl_buffer to this surface's texture - void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, + GLuint draw_fb_handle); void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, Tegra::Texture::SwizzleSource swizzle_y, @@ -429,13 +432,13 @@ public: } private: - void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); + void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, + GLuint read_fb_handle, GLuint draw_fb_handle); void EnsureTextureDiscrepantView(); OGLTexture texture; OGLTexture discrepant_view; - std::vector<std::vector<u8>> gl_buffer; SurfaceParams params{}; GLenum gl_target{}; GLenum gl_internal_format{}; @@ -473,6 +476,11 @@ public: void SignalPreDrawCall(); void SignalPostDrawCall(); +protected: + void FlushObjectInner(const Surface& object) override { + object->FlushGLBuffer(temporal_memory); + } + private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); @@ -519,6 +527,8 @@ private: std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; + RasterizerTemporaryMemory temporal_memory; + using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; using SurfaceInterval = typename SurfaceIntervalCache::interval_type; diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp new file mode 100644 index 000000000..3ded5ecea --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp @@ -0,0 +1,52 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace OpenGL { + +SamplerCacheOpenGL::SamplerCacheOpenGL() = default; + +SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; + +OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + OGLSampler sampler; + sampler.Create(); + + const GLuint sampler_id{sampler.handle}; + glSamplerParameteri( + sampler_id, GL_TEXTURE_MAG_FILTER, + MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); + glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, + MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, + tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, + MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); + glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); + if (GLAD_GL_ARB_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); + } else if (GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); + } else if (tsc.GetMaxAnisotropy() != 1) { + LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); + } + + return sampler; +} + +GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { + return sampler.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h new file mode 100644 index 000000000..defbc2d81 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.h @@ -0,0 +1,25 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/sampler_cache.h" + +namespace OpenGL { + +class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { +public: + explicit SamplerCacheOpenGL(); + ~SamplerCacheOpenGL(); + +protected: + OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; + + GLuint ToSamplerType(const OGLSampler& sampler) const; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 99f67494c..f700dc89a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -38,13 +38,15 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { } /// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(const u8* host_ptr) { +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, + const u8* host_ptr) { ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); ASSERT_OR_EXECUTE(host_ptr != nullptr, { std::fill(program_code.begin(), program_code.end(), 0); return program_code; }); - std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); + memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), + program_code.size() * sizeof(u64)); return program_code; } @@ -134,8 +136,8 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& } /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code, - ProgramCode program_code_b) { +GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, + ProgramCode program_code, ProgramCode program_code_b) { GLShader::ShaderSetup setup(program_code); if (program_type == Maxwell::ShaderProgram::VertexA) { // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. @@ -149,11 +151,11 @@ GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, Progr switch (program_type) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: - return GLShader::GenerateVertexShader(setup); + return GLShader::GenerateVertexShader(device, setup); case Maxwell::ShaderProgram::Geometry: - return GLShader::GenerateGeometryShader(setup); + return GLShader::GenerateGeometryShader(device, setup); case Maxwell::ShaderProgram::Fragment: - return GLShader::GenerateFragmentShader(setup); + return GLShader::GenerateFragmentShader(device, setup); default: LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); UNREACHABLE(); @@ -212,22 +214,20 @@ std::set<GLenum> GetSupportedFormats() { return supported_formats; } -} // namespace +} // Anonymous namespace -CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, +CachedShader::CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { - - const std::size_t code_size = CalculateProgramSize(program_code); - const std::size_t code_size_b = - program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b); - - GLShader::ProgramResult program_result = - CreateProgram(program_type, program_code, program_code_b); + const std::size_t code_size{CalculateProgramSize(program_code)}; + const std::size_t code_size_b{program_code_b.empty() ? 0 + : CalculateProgramSize(program_code_b)}; + GLShader::ProgramResult program_result{ + CreateProgram(device, program_type, program_code, program_code_b)}; if (program_result.first.empty()) { // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now return; @@ -251,7 +251,6 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{ precompiled_programs} { - code = std::move(result.first); entries = result.second; shader_length = entries.shader_length; @@ -344,8 +343,9 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, return {unique_identifier, base_bindings, primitive_mode}; } -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system) - : RasterizerCache{rasterizer}, disk_cache{system} {} +ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, + const Device& device) + : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -363,6 +363,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (stop_loading) return; + // Track if precompiled cache was altered during loading to know if we have to serialize the + // virtual precompiled cache file back to the hard drive + bool precompiled_cache_altered = false; + // Build shaders if (callback) callback(VideoCore::LoadCallbackStage::Build, 0, usages.size()); @@ -384,6 +388,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (!shader) { // Invalidate the precompiled cache if a shader dumped shader was rejected disk_cache.InvalidatePrecompiled(); + precompiled_cache_altered = true; dumps.clear(); } } @@ -405,8 +410,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (dumps.find(usage) == dumps.end()) { const auto& program = precompiled_programs.at(usage); disk_cache.SaveDump(usage, program->handle); + precompiled_cache_altered = true; } } + + if (precompiled_cache_altered) { + disk_cache.SaveVirtualPrecompiledFile(); + } } CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( @@ -439,17 +449,18 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { std::unordered_map<u64, UnspecializedShader> unspecialized; - if (callback) + if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); + } for (std::size_t i = 0; i < raws.size(); ++i) { - if (stop_loading) + if (stop_loading) { return {}; - + } const auto& raw{raws[i]}; - const u64 unique_identifier = raw.GetUniqueIdentifier(); - const u64 calculated_hash = - GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + const u64 calculated_hash{ + GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; if (unique_identifier != calculated_hash) { LOG_ERROR( Render_OpenGL, @@ -466,8 +477,8 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia result = {stored_decompiled.code, stored_decompiled.entries}; } else { // Otherwise decompile the shader at boot and save the result to the decompiled file - result = - CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); + result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), + raw.GetProgramCodeB()); disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); } @@ -477,8 +488,9 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia {raw.GetUniqueIdentifier(), {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); - if (callback) + if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); + } } return unspecialized; } @@ -497,11 +509,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { if (!shader) { // No shader found - create a new one - ProgramCode program_code{GetShaderCode(host_ptr)}; + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; ProgramCode program_code_b; if (program == Maxwell::ShaderProgram::VertexA) { - program_code_b = GetShaderCode( - memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); + const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); } const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; @@ -512,7 +525,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { precompiled_programs, found->second, host_ptr); } else { shader = std::make_shared<CachedShader>( - cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, + device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, std::move(program_code), std::move(program_code_b), host_ptr); } Register(shader); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 0cf8e0b3d..31b979987 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -27,6 +27,7 @@ class System; namespace OpenGL { class CachedShader; +class Device; class RasterizerOpenGL; struct UnspecializedShader; @@ -38,7 +39,7 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; class CachedShader final : public RasterizerCacheObject { public: - explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, + explicit CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); @@ -56,9 +57,6 @@ public: return shader_length; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - /// Gets the shader entries for the shader const GLShader::ShaderEntries& GetShaderEntries() const { return entries; @@ -112,7 +110,8 @@ private: class ShaderCacheOpenGL final : public RasterizerCache<Shader> { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system); + explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, + const Device& device); /// Loads disk cache for the current game void LoadDiskCache(const std::atomic_bool& stop_loading, @@ -121,6 +120,10 @@ public: /// Gets the current specified shader stage program Shader GetStageProgram(Maxwell::ShaderProgram program); +protected: + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const Shader& object) override {} + private: std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, @@ -130,6 +133,8 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + const Device& device; + std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; ShaderDiskCacheOpenGL disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 28e490b3c..1a62795e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -15,6 +15,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/shader/shader_ir.h" @@ -45,8 +46,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>; enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); -constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = - static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); class ShaderWriter { public: @@ -121,14 +120,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { /// Returns true if an object has to be treated as precise bool IsPrecise(Operation operand) { - const auto& meta = operand.GetMeta(); - + const auto& meta{operand.GetMeta()}; if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { return arithmetic->precise; } - if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) { - return half_arithmetic->precise; - } return false; } @@ -141,8 +136,9 @@ bool IsPrecise(Node node) { class GLSLDecompiler final { public: - explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix) - : ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} + explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, + std::string suffix) + : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} void Decompile() { DeclareVertex(); @@ -208,8 +204,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, + usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -380,12 +378,22 @@ private: } void DeclareGlobalMemory() { - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& gmem : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem; + + // Since we don't know how the shader will use the shader, hint the driver to disable as + // much optimizations as possible + std::string qualifier = "coherent volatile"; + if (usage.is_read && !usage.is_written) + qualifier += " readonly"; + else if (usage.is_written && !usage.is_read) + qualifier += " writeonly"; + const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") buffer " + - GetGlobalMemoryBlock(entry) + " {"); - code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); + fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); + code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + + GetGlobalMemoryBlock(base) + " {"); + code.AddLine(" float " + GetGlobalMemory(base) + "[];"); code.AddLine("};"); code.AddNewLine(); } @@ -617,28 +625,7 @@ private: } std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { - std::string value = VisitOperand(operation, operand_index); - switch (type) { - case Type::HalfFloat: { - const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); - if (!half_meta) { - value = "toHalf2(" + value + ')'; - } - - switch (half_meta->types.at(operand_index)) { - case Tegra::Shader::HalfType::H0_H1: - return "toHalf2(" + value + ')'; - case Tegra::Shader::HalfType::F32: - return "vec2(" + value + ')'; - case Tegra::Shader::HalfType::H0_H0: - return "vec2(toHalf2(" + value + ")[0])"; - case Tegra::Shader::HalfType::H1_H1: - return "vec2(toHalf2(" + value + ")[1])"; - } - } - default: - return CastOperand(value, type); - } + return CastOperand(VisitOperand(operation, operand_index), type); } std::string CastOperand(const std::string& value, Type type) const { @@ -652,9 +639,7 @@ private: case Type::Uint: return "ftou(" + value + ')'; case Type::HalfFloat: - // Can't be handled as a stand-alone value - UNREACHABLE(); - return value; + return "toHalf2(" + value + ')'; } UNREACHABLE(); return value; @@ -819,8 +804,12 @@ private: // Inline the string as an immediate integer in GLSL (AOFFI arguments are required // to be constant by the standard). expr += std::to_string(static_cast<s32>(immediate->GetValue())); - } else { + } else if (device.HasVariableAoffi()) { + // Avoid using variable AOFFI on unsupported devices. expr += "ftoi(" + Visit(operand) + ')'; + } else { + // Insert 0 on devices not supporting variable AOFFI. + expr += '0'; } if (index + 1 < aoffi.size()) { expr += ", "; @@ -868,6 +857,12 @@ private: } else if (const auto lmem = std::get_if<LmemNode>(dest)) { target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; + } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + const std::string real = Visit(gmem->GetRealAddress()); + const std::string base = Visit(gmem->GetBaseAddress()); + const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -876,17 +871,6 @@ private: return {}; } - std::string Composite(Operation operation) { - std::string value = "vec4("; - for (std::size_t i = 0; i < 4; ++i) { - value += Visit(operation[i]); - if (i < 3) - value += ", "; - } - value += ')'; - return value; - } - template <Type type> std::string Add(Operation operation) { return GenerateBinaryInfix(operation, "+", type, type, type); @@ -1067,13 +1051,40 @@ private: return BitwiseCastResult(value, Type::HalfFloat); } + std::string HClamp(Operation operation) { + const std::string value = VisitOperand(operation, 0, Type::HalfFloat); + const std::string min = VisitOperand(operation, 1, Type::Float); + const std::string max = VisitOperand(operation, 2, Type::Float); + const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; + return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); + } + + std::string HUnpack(Operation operation) { + const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; + const auto value = [&]() -> std::string { + switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { + case Tegra::Shader::HalfType::H0_H1: + return operand; + case Tegra::Shader::HalfType::F32: + return "vec2(fromHalf2(" + operand + "))"; + case Tegra::Shader::HalfType::H0_H0: + return "vec2(" + operand + "[0])"; + case Tegra::Shader::HalfType::H1_H1: + return "vec2(" + operand + "[1])"; + } + UNREACHABLE(); + return "0"; + }(); + return "fromHalf2(" + value + ')'; + } + std::string HMergeF32(Operation operation) { return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; } std::string HMergeH0(Operation operation) { - return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + - Visit(operation[1]) + ")[0]))"; + return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" + + Visit(operation[0]) + ")[1]))"; } std::string HMergeH1(Operation operation) { @@ -1173,34 +1184,46 @@ private: return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); } + template <bool with_nan> + std::string GenerateHalfComparison(Operation operation, std::string compare_op) { + std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, + Type::HalfFloat, Type::HalfFloat)}; + if constexpr (!with_nan) { + return comparison; + } + return "halfFloatNanComparison(" + comparison + ", " + + VisitOperand(operation, 0, Type::HalfFloat) + ", " + + VisitOperand(operation, 1, Type::HalfFloat) + ')'; + } + + template <bool with_nan> std::string Logical2HLessThan(Operation operation) { - return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "lessThan"); } + template <bool with_nan> std::string Logical2HEqual(Operation operation) { - return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "equal"); } + template <bool with_nan> std::string Logical2HLessEqual(Operation operation) { - return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); } + template <bool with_nan> std::string Logical2HGreaterThan(Operation operation) { - return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "greaterThan"); } + template <bool with_nan> std::string Logical2HNotEqual(Operation operation) { - return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "notEqual"); } + template <bool with_nan> std::string Logical2HGreaterEqual(Operation operation) { - return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); } std::string Texture(Operation operation) { @@ -1489,6 +1512,8 @@ private: &GLSLDecompiler::Fma<Type::HalfFloat>, &GLSLDecompiler::Absolute<Type::HalfFloat>, &GLSLDecompiler::HNegate, + &GLSLDecompiler::HClamp, + &GLSLDecompiler::HUnpack, &GLSLDecompiler::HMergeF32, &GLSLDecompiler::HMergeH0, &GLSLDecompiler::HMergeH1, @@ -1525,12 +1550,18 @@ private: &GLSLDecompiler::LogicalNotEqual<Type::Uint>, &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, + &GLSLDecompiler::Logical2HLessThan<false>, + &GLSLDecompiler::Logical2HEqual<false>, + &GLSLDecompiler::Logical2HLessEqual<false>, + &GLSLDecompiler::Logical2HGreaterThan<false>, + &GLSLDecompiler::Logical2HNotEqual<false>, + &GLSLDecompiler::Logical2HGreaterEqual<false>, + &GLSLDecompiler::Logical2HLessThan<true>, + &GLSLDecompiler::Logical2HEqual<true>, + &GLSLDecompiler::Logical2HLessEqual<true>, + &GLSLDecompiler::Logical2HGreaterThan<true>, + &GLSLDecompiler::Logical2HNotEqual<true>, + &GLSLDecompiler::Logical2HGreaterEqual<true>, &GLSLDecompiler::Texture, &GLSLDecompiler::TextureLod, @@ -1609,6 +1640,7 @@ private: return name + '_' + std::to_string(index) + '_' + suffix; } + const Device& device; const ShaderIR& ir; const ShaderStage stage; const std::string suffix; @@ -1621,9 +1653,7 @@ private: std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" @@ -1633,11 +1663,18 @@ std::string GetCommonDeclarations() { "}\n\n" "vec2 toHalf2(float value) {\n" " return unpackHalf2x16(ftou(value));\n" + "}\n\n" + "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n" + " bvec2 is_nan1 = isnan(pair1);\n" + " bvec2 is_nan2 = isnan(pair2);\n" + " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " + "is_nan2.y);\n" "}\n"; } -ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) { - GLSLDecompiler decompiler(ir, stage, suffix); +ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, + const std::string& suffix) { + GLSLDecompiler decompiler(device, ir, stage, suffix); decompiler.Decompile(); return {decompiler.GetResult(), decompiler.GetShaderEntries()}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 4e04ab2f8..c1569e737 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -12,6 +12,10 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/shader/shader_ir.h" +namespace OpenGL { +class Device; +} + namespace VideoCommon::Shader { class ShaderIR; } @@ -39,8 +43,9 @@ private: class GlobalMemoryEntry { public: - explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} + explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ + is_written} {} u32 GetCbufIndex() const { return cbuf_index; @@ -50,14 +55,25 @@ public: return cbuf_offset; } + bool IsRead() const { + return is_read; + } + + bool IsWritten() const { + return is_written; + } + private: u32 cbuf_index{}; u32 cbuf_offset{}; + bool is_read{}; + bool is_written{}; }; struct ShaderEntries { std::vector<ConstBufferEntry> const_buffers; std::vector<SamplerEntry> samplers; + std::vector<SamplerEntry> bindless_samplers; std::vector<GlobalMemoryEntry> global_memory_entries; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::size_t shader_length{}; @@ -65,7 +81,7 @@ struct ShaderEntries { std::string GetCommonDeclarations(); -ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, - const std::string& suffix); +ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage, const std::string& suffix); -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 8a43eb157..fba9c594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -106,6 +106,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} +ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; + std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> ShaderDiskCacheOpenGL::LoadTransferable() { // Skip games without title id @@ -177,6 +179,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { return {}; } } + return {{raws, usages}}; } @@ -208,59 +211,64 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { + // Read compressed file from disk and decompress to virtual precompiled cache file + std::vector<u8> compressed(file.GetSize()); + file.ReadBytes(compressed.data(), compressed.size()); + const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); + SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); + precompiled_cache_virtual_file_offset = 0; + ShaderCacheVersionHash file_hash{}; - if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) { + if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { + precompiled_cache_virtual_file_offset = 0; return {}; } if (GetShaderCacheVersionHash() != file_hash) { LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); + precompiled_cache_virtual_file_offset = 0; return {}; } std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps; - while (file.Tell() < file.GetSize()) { + while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { PrecompiledEntryKind kind{}; - if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { + if (!LoadObjectFromPrecompiled(kind)) { return {}; } switch (kind) { case PrecompiledEntryKind::Decompiled: { u64 unique_identifier{}; - if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64)) + if (!LoadObjectFromPrecompiled(unique_identifier)) { return {}; + } - const auto entry = LoadDecompiledEntry(file); - if (!entry) + auto entry = LoadDecompiledEntry(); + if (!entry) { return {}; + } decompiled.insert({unique_identifier, std::move(*entry)}); break; } case PrecompiledEntryKind::Dump: { ShaderDiskCacheUsage usage; - if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) + if (!LoadObjectFromPrecompiled(usage)) { return {}; + } ShaderDiskCacheDump dump; - if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32)) - return {}; - - u32 binary_length{}; - u32 compressed_size{}; - if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) { + if (!LoadObjectFromPrecompiled(dump.binary_format)) { return {}; } - std::vector<u8> compressed_binary(compressed_size); - if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) != - compressed_binary.size()) { + u32 binary_length{}; + if (!LoadObjectFromPrecompiled(binary_length)) { return {}; } - dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary); - if (dump.binary.empty()) { + dump.binary.resize(binary_length); + if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { return {}; } @@ -274,143 +282,151 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {{decompiled, dumps}}; } -std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry( - FileUtil::IOFile& file) { +std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { u32 code_size{}; - u32 compressed_code_size{}; - if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) { + if (!LoadObjectFromPrecompiled(code_size)) { return {}; } - std::vector<u8> compressed_code(compressed_code_size); - if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) { + std::string code(code_size, '\0'); + if (!LoadArrayFromPrecompiled(code.data(), code.size())) { return {}; } - const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code); - if (code.empty()) { - return {}; - } ShaderDiskCacheDecompiled entry; - entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size); + entry.code = std::move(code); u32 const_buffers_count{}; - if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32)) + if (!LoadObjectFromPrecompiled(const_buffers_count)) { return {}; + } + for (u32 i = 0; i < const_buffers_count; ++i) { u32 max_offset{}; u32 index{}; - u8 is_indirect{}; - if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) { + bool is_indirect{}; + if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || + !LoadObjectFromPrecompiled(is_indirect)) { return {}; } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index); + entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); } u32 samplers_count{}; - if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32)) + if (!LoadObjectFromPrecompiled(samplers_count)) { return {}; + } + for (u32 i = 0; i < samplers_count; ++i) { u64 offset{}; u64 index{}; u32 type{}; - u8 is_array{}; - u8 is_shadow{}; - if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || - file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || - file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) || - file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) { + bool is_array{}; + bool is_shadow{}; + bool is_bindless{}; + if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || + !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || + !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { return {}; } entry.entries.samplers.emplace_back( static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0); + static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); } u32 global_memory_count{}; - if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32)) + if (!LoadObjectFromPrecompiled(global_memory_count)) { return {}; + } + for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; - if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) { + bool is_read{}; + bool is_written{}; + if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || + !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, + is_written); } for (auto& clip_distance : entry.entries.clip_distances) { - u8 clip_distance_raw{}; - if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8)) + if (!LoadObjectFromPrecompiled(clip_distance)) { return {}; - clip_distance = clip_distance_raw != 0; + } } u64 shader_length{}; - if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64)) + if (!LoadObjectFromPrecompiled(shader_length)) { return {}; + } + entry.entries.shader_length = static_cast<std::size_t>(shader_length); return entry; } -bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, - const std::string& code, - const std::vector<u8>& compressed_code, +bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, const GLShader::ShaderEntries& entries) { - if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 || - file.WriteObject(unique_identifier) != 1 || - file.WriteObject(static_cast<u32>(code.size())) != 1 || - file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 || - file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) { + if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || + !SaveObjectToPrecompiled(unique_identifier) || + !SaveObjectToPrecompiled(static_cast<u32>(code.size())) || + !SaveArrayToPrecompiled(code.data(), code.size())) { return false; } - if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1) + if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) { return false; + } for (const auto& cbuf : entries.const_buffers) { - if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 || - file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 || - file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) { + if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || + !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || + !SaveObjectToPrecompiled(cbuf.IsIndirect())) { return false; } } - if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1) + if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) { return false; + } for (const auto& sampler : entries.samplers) { - if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 || - file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 || - file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 || - file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 || - file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) { + if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || + !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || + !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || + !SaveObjectToPrecompiled(sampler.IsArray()) || + !SaveObjectToPrecompiled(sampler.IsShadow()) || + !SaveObjectToPrecompiled(sampler.IsBindless())) { return false; } } - if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1) + if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { return false; + } for (const auto& gmem : entries.global_memory_entries) { - if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || - file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) { + if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || + !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || + !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { return false; } } for (const bool clip_distance : entries.clip_distances) { - if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1) + if (!SaveObjectToPrecompiled(clip_distance)) { return false; + } } - return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1; + if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { + return false; + } + + return true; } -void ShaderDiskCacheOpenGL::InvalidateTransferable() const { +void ShaderDiskCacheOpenGL::InvalidateTransferable() { if (!FileUtil::Delete(GetTransferablePath())) { LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", GetTransferablePath()); @@ -418,7 +434,10 @@ void ShaderDiskCacheOpenGL::InvalidateTransferable() const { InvalidatePrecompiled(); } -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const { +void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { + // Clear virtaul precompiled cache file + precompiled_cache_virtual_file.Resize(0); + if (!FileUtil::Delete(GetPrecompiledPath())) { LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath()); } @@ -454,7 +473,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); auto& usages{it->second}; - ASSERT(usages.find(usage) == usages.end()); + if (usages.find(usage) != usages.end()) { + // Skip this variant since the shader is already stored. + return; + } usages.insert(usage); FileUtil::IOFile file = AppendTransferableFile(); @@ -474,22 +496,13 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str if (!IsUsable()) return; - const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault( - reinterpret_cast<const u8*>(code.data()), code.size())}; - if (compressed_code.empty()) { - LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", - unique_identifier); - return; + if (precompiled_cache_virtual_file.GetSize() == 0) { + SavePrecompiledHeaderToVirtualPrecompiledCache(); } - FileUtil::IOFile file = AppendPrecompiledFile(); - if (!file.IsOpen()) - return; - - if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) { + if (!SaveDecompiledFile(unique_identifier, code, entries)) { LOG_ERROR(Render_OpenGL, "Failed to save decompiled entry to the precompiled file - removing"); - file.Close(); InvalidatePrecompiled(); } } @@ -505,28 +518,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - const std::vector<u8> compressed_binary = - Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size()); - - if (compressed_binary.empty()) { - LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", - usage.unique_identifier); - return; - } - - FileUtil::IOFile file = AppendPrecompiledFile(); - if (!file.IsOpen()) - return; - - if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 || - file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 || - file.WriteObject(static_cast<u32>(binary_length)) != 1 || - file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 || - file.WriteArray(compressed_binary.data(), compressed_binary.size()) != - compressed_binary.size()) { + if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || + !SaveObjectToPrecompiled(usage) || + !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || + !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || + !SaveArrayToPrecompiled(binary.data(), binary.size())) { LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", usage.unique_identifier); - file.Close(); InvalidatePrecompiled(); return; } @@ -559,28 +557,33 @@ FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { return file; } -FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const { - if (!EnsureDirectories()) - return {}; +void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { + const auto hash{GetShaderCacheVersionHash()}; + if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { + LOG_ERROR( + Render_OpenGL, + "Failed to write precompiled cache version hash to virtual precompiled cache file"); + } +} + +void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { + precompiled_cache_virtual_file_offset = 0; + const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); + const std::vector<u8>& compressed = + Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); const auto precompiled_path{GetPrecompiledPath()}; - const bool existed = FileUtil::Exists(precompiled_path); + FileUtil::IOFile file(precompiled_path, "wb"); - FileUtil::IOFile file(precompiled_path, "ab"); if (!file.IsOpen()) { LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path); - return {}; + return; } - - if (!existed || file.GetSize() == 0) { - const auto hash{GetShaderCacheVersionHash()}; - if (file.WriteArray(hash.data(), hash.size()) != hash.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}", - precompiled_path); - return {}; - } + if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { + LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", + precompiled_path); + return; } - return file; } bool ShaderDiskCacheOpenGL::EnsureDirectories() const { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 6be0c0547..2da0a4a23 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -16,6 +16,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "core/file_sys/vfs_vector.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -69,14 +70,14 @@ namespace std { template <> struct hash<OpenGL::BaseBindings> { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const { + std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; } }; template <> struct hash<OpenGL::ShaderDiskCacheUsage> { - std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const { + std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { return static_cast<std::size_t>(usage.unique_identifier) ^ std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16; } @@ -161,6 +162,7 @@ struct ShaderDiskCacheDump { class ShaderDiskCacheOpenGL { public: explicit ShaderDiskCacheOpenGL(Core::System& system); + ~ShaderDiskCacheOpenGL(); /// Loads transferable cache. If file has a old version or on failure, it deletes the file. std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> @@ -172,10 +174,10 @@ public: LoadPrecompiled(); /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable() const; + void InvalidateTransferable(); - /// Removes the precompiled cache file. - void InvalidatePrecompiled() const; + /// Removes the precompiled cache file and clears virtual precompiled cache file. + void InvalidatePrecompiled(); /// Saves a raw dump to the transferable file. Checks for collisions. void SaveRaw(const ShaderDiskCacheRaw& entry); @@ -190,18 +192,21 @@ public: /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); + /// Serializes virtual precompiled shader cache file to real file + void SaveVirtualPrecompiledFile(); + private: /// Loads the transferable cache. Returns empty on failure. std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> LoadPrecompiledFile(FileUtil::IOFile& file); - /// Loads a decompiled cache entry from the passed file. Returns empty on failure. - std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file); + /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on + /// failure. + std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); /// Saves a decompiled entry to the passed file. Returns true on success. - bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code, - const std::vector<u8>& compressed_code, + bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, const GLShader::ShaderEntries& entries); /// Returns if the cache can be used @@ -210,8 +215,8 @@ private: /// Opens current game's transferable file and write it's header if it doesn't exist FileUtil::IOFile AppendTransferableFile() const; - /// Opens current game's precompiled file and write it's header if it doesn't exist - FileUtil::IOFile AppendPrecompiledFile() const; + /// Save precompiled header to precompiled_cache_in_memory + void SavePrecompiledHeaderToVirtualPrecompiledCache(); /// Create shader disk cache directories. Returns true on success. bool EnsureDirectories() const; @@ -234,10 +239,57 @@ private: /// Get current game's title id std::string GetTitleID() const; - // Copre system + template <typename T> + bool SaveArrayToPrecompiled(const T* data, std::size_t length) { + const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( + data, length, precompiled_cache_virtual_file_offset); + precompiled_cache_virtual_file_offset += write_length; + return write_length == sizeof(T) * length; + } + + template <typename T> + bool LoadArrayFromPrecompiled(T* data, std::size_t length) { + const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( + data, length, precompiled_cache_virtual_file_offset); + precompiled_cache_virtual_file_offset += read_length; + return read_length == sizeof(T) * length; + } + + template <typename T> + bool SaveObjectToPrecompiled(const T& object) { + return SaveArrayToPrecompiled(&object, 1); + } + + bool SaveObjectToPrecompiled(bool object) { + const auto value = static_cast<u8>(object); + return SaveArrayToPrecompiled(&value, 1); + } + + template <typename T> + bool LoadObjectFromPrecompiled(T& object) { + return LoadArrayFromPrecompiled(&object, 1); + } + + bool LoadObjectFromPrecompiled(bool& object) { + u8 value; + const bool read_ok = LoadArrayFromPrecompiled(&value, 1); + if (!read_ok) { + return false; + } + + object = value != 0; + return true; + } + + // Core system Core::System& system; // Stored transferable shaders std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; + // Stores whole precompiled cache which will be read from/saved to the precompiled cache file + FileSys::VectorVfsFile precompiled_cache_virtual_file; + // Stores the current offset of the precompiled cache file for IO purposes + std::size_t precompiled_cache_virtual_file_offset = 0; + // The cache has been loaded at boot bool tried_to_load{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8763d9c71..7ab0b4553 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -16,7 +16,7 @@ using VideoCommon::Shader::ShaderIR; static constexpr u32 PROGRAM_OFFSET{10}; -ProgramResult GenerateVertexShader(const ShaderSetup& setup) { +ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; @@ -33,15 +33,16 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); - ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + ProgramResult program = + Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); out += program.first; if (setup.IsDualProgram()) { - ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); + const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); ProgramResult program_b = - Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); + Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); out += program_b.first; } @@ -57,6 +58,9 @@ void main() { } out += R"( + + // Set Position Y direction + position.y *= utof(config_pack[2]); // Check if the flip stage is VertexB // Config pack's second value is flip_stage if (config_pack[1] == 1) { @@ -72,10 +76,10 @@ void main() { } })"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { +ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; @@ -93,9 +97,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = - Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); + Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); out += program.first; out += R"( @@ -103,10 +107,10 @@ void main() { execute_geometry(); };)"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { +ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; @@ -156,9 +160,9 @@ bool AlphaFunc(in float value) { } )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = - Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); + Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); out += program.first; @@ -168,7 +172,7 @@ void main() { } )"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fad346b48..0536c8a03 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -10,6 +10,10 @@ #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/shader/shader_ir.h" +namespace OpenGL { +class Device; +} + namespace OpenGL::GLShader { using VideoCommon::Shader::ProgramCode; @@ -39,22 +43,13 @@ private: bool has_program_b{}; }; -/** - * Generates the GLSL vertex shader program source code for the given VS program - * @returns String of the shader source code - */ -ProgramResult GenerateVertexShader(const ShaderSetup& setup); - -/** - * Generates the GLSL geometry shader program source code for the given GS program - * @returns String of the shader source code - */ -ProgramResult GenerateGeometryShader(const ShaderSetup& setup); - -/** - * Generates the GLSL fragment shader program source code for the given FS program - * @returns String of the shader source code - */ -ProgramResult GenerateFragmentShader(const ShaderSetup& setup); +/// Generates the GLSL vertex shader program source code for the given VS program +ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); + +/// Generates the GLSL geometry shader program source code for the given GS program +ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); + +/// Generates the GLSL fragment shader program source code for the given FS program +ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 52d569a1b..7425fbe5d 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -471,8 +471,9 @@ void OpenGLState::ApplyTextures() const { const auto& texture_unit = texture_units[i]; auto& cur_state_texture_unit = cur_state.texture_units[i]; textures[i] = texture_unit.texture; - if (cur_state_texture_unit.texture == textures[i]) + if (cur_state_texture_unit.texture == textures[i]) { continue; + } cur_state_texture_unit.texture = textures[i]; if (!has_delta) { first = i; @@ -493,10 +494,11 @@ void OpenGLState::ApplySamplers() const { std::array<GLuint, Maxwell::NumTextureSamplers> samplers; for (std::size_t i = 0; i < std::size(samplers); ++i) { - if (cur_state.texture_units[i].sampler == texture_units[i].sampler) + samplers[i] = texture_units[i].sampler; + if (cur_state.texture_units[i].sampler == texture_units[i].sampler) { continue; + } cur_state.texture_units[i].sampler = texture_units[i].sampler; - samplers[i] = texture_units[i].sampler; if (!has_delta) { first = i; has_delta = true; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index a8833c06e..ed7b5cff0 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -27,8 +27,7 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedInt: - case Maxwell::VertexAttribute::Type::UnsignedNorm: { - + case Maxwell::VertexAttribute::Type::UnsignedNorm: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -47,16 +46,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + UNREACHABLE(); + return {}; } - - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); - return {}; - } - case Maxwell::VertexAttribute::Type::SignedInt: - case Maxwell::VertexAttribute::Type::SignedNorm: { - + case Maxwell::VertexAttribute::Type::SignedNorm: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -75,14 +71,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + UNREACHABLE(); + return {}; } - - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); - return {}; - } - - case Maxwell::VertexAttribute::Type::Float: { + case Maxwell::VertexAttribute::Type::Float: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_16: case Maxwell::VertexAttribute::Size::Size_16_16: @@ -94,13 +88,16 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32: case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + UNREACHABLE(); + return {}; } + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); + UNREACHABLE(); + return {}; } - } - - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); - UNREACHABLE(); - return {}; } inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { @@ -129,10 +126,13 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; + case Maxwell::PrimitiveTopology::TriangleFan: + return GL_TRIANGLE_FAN; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); + UNREACHABLE(); + return {}; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); - UNREACHABLE(); - return {}; } inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, @@ -186,9 +186,10 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { } else { return GL_MIRROR_CLAMP_TO_EDGE; } + default: + LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); + return GL_REPEAT; } - LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); - return GL_REPEAT; } inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 34bf26ff2..9fe1e3280 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -62,9 +62,10 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); return vk::SamplerAddressMode::eMirrorClampToEdge; + default: + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + return {}; } - UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); - return {}; } vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { @@ -225,9 +226,10 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return vk::PrimitiveTopology::eTriangleList; case Maxwell::PrimitiveTopology::TriangleStrip: return vk::PrimitiveTopology::eTriangleStrip; + default: + UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + return {}; } - UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); - return {}; } vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 08b786aad..3edf460df 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -49,9 +49,6 @@ public: return alignment; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - private: VAddr cpu_addr{}; std::size_t size{}; @@ -87,6 +84,10 @@ public: return buffer_handle; } +protected: + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + private: void AlignBuffer(std::size_t alignment); diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index ed3178f09..801826d3d 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -7,7 +7,6 @@ #include <unordered_map> #include "common/assert.h" -#include "common/cityhash.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -28,39 +27,20 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> } } -std::size_t SamplerCacheKey::Hash() const { - static_assert(sizeof(raw) % sizeof(u64) == 0); - return static_cast<std::size_t>( - Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); -} - -bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { - return raw == rhs.raw; -} - VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} VKSamplerCache::~VKSamplerCache() = default; -vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) { - const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); - auto& sampler = entry->second; - if (is_cache_miss) { - sampler = CreateSampler(tsc); - } - return *sampler; -} - -UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) { - const float max_anisotropy = tsc.GetMaxAnisotropy(); - const bool has_anisotropy = max_anisotropy > 1.0f; +UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + const float max_anisotropy{tsc.GetMaxAnisotropy()}; + const bool has_anisotropy{max_anisotropy > 1.0f}; - const auto border_color = tsc.GetBorderColor(); - const auto vk_border_color = TryConvertBorderColor(border_color); + const auto border_color{tsc.GetBorderColor()}; + const auto vk_border_color{TryConvertBorderColor(border_color)}; UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", border_color[0], border_color[1], border_color[2], border_color[3]); - constexpr bool unnormalized_coords = false; + constexpr bool unnormalized_coords{false}; const vk::SamplerCreateInfo sampler_ci( {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), @@ -73,9 +53,13 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), unnormalized_coords); - const auto& dld = device.GetDispatchLoader(); - const auto dev = device.GetLogical(); + const auto& dld{device.GetDispatchLoader()}; + const auto dev{device.GetLogical()}; return dev.createSamplerUnique(sampler_ci, nullptr, dld); } +vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const { + return *sampler; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index c6394dc87..771b05c73 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h @@ -8,49 +8,25 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" +#include "video_core/sampler_cache.h" #include "video_core/textures/texture.h" namespace Vulkan { class VKDevice; -struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { - std::size_t Hash() const; - - bool operator==(const SamplerCacheKey& rhs) const; - - bool operator!=(const SamplerCacheKey& rhs) const { - return !operator==(rhs); - } -}; - -} // namespace Vulkan - -namespace std { - -template <> -struct hash<Vulkan::SamplerCacheKey> { - std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - -class VKSamplerCache { +class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> { public: explicit VKSamplerCache(const VKDevice& device); ~VKSamplerCache(); - vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc); +protected: + UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; -private: - UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc); + vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; +private: const VKDevice& device; - std::unordered_map<SamplerCacheKey, UniqueSampler> cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index e0a6f5e87..a11000f6b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -76,14 +76,10 @@ constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) { /// Returns true if an object has to be treated as precise bool IsPrecise(Operation operand) { - const auto& meta = operand.GetMeta(); - + const auto& meta{operand.GetMeta()}; if (std::holds_alternative<MetaArithmetic>(meta)) { return std::get<MetaArithmetic>(meta).precise; } - if (std::holds_alternative<MetaHalfArithmetic>(meta)) { - return std::get<MetaHalfArithmetic>(meta).precise; - } return false; } @@ -191,8 +187,9 @@ public: for (const auto& cbuf : ir.GetConstantBuffers()) { entries.const_buffers.emplace_back(cbuf.second, cbuf.first); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset); } for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); @@ -225,7 +222,7 @@ private: return current_binding; }; const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size()); - global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size()); + global_buffers_base_binding = Allocate(ir.GetGlobalMemory().size()); samplers_base_binding = Allocate(ir.GetSamplers().size()); ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE, @@ -318,7 +315,6 @@ private: constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", "overflow"}; for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -390,14 +386,15 @@ private: void DeclareGlobalBuffers() { u32 binding = global_buffers_base_binding; - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& entry : ir.GetGlobalMemory()) { + const auto [base, usage] = entry; const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset))); + Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(entry, id); + global_buffers.emplace(base, id); } } @@ -744,6 +741,16 @@ private: return {}; } + Id HClamp(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id HUnpack(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HMergeF32(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1216,6 +1223,8 @@ private: &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, &SPIRVDecompiler::HNegate, + &SPIRVDecompiler::HClamp, + &SPIRVDecompiler::HUnpack, &SPIRVDecompiler::HMergeF32, &SPIRVDecompiler::HMergeH0, &SPIRVDecompiler::HMergeH1, @@ -1258,6 +1267,13 @@ private: &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, + // TODO(Rodrigo): Should these use the OpFUnord* variants? + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, &SPIRVDecompiler::Texture, &SPIRVDecompiler::TextureLod, diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp new file mode 100644 index 000000000..53c7ef12d --- /dev/null +++ b/src/video_core/sampler_cache.cpp @@ -0,0 +1,21 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/cityhash.h" +#include "common/common_types.h" +#include "video_core/sampler_cache.h" + +namespace VideoCommon { + +std::size_t SamplerCacheKey::Hash() const { + static_assert(sizeof(raw) % sizeof(u64) == 0); + return static_cast<std::size_t>( + Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); +} + +bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { + return raw == rhs.raw; +} + +} // namespace VideoCommon diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h new file mode 100644 index 000000000..cbe3ad071 --- /dev/null +++ b/src/video_core/sampler_cache.h @@ -0,0 +1,60 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <unordered_map> + +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { + std::size_t Hash() const; + + bool operator==(const SamplerCacheKey& rhs) const; + + bool operator!=(const SamplerCacheKey& rhs) const { + return !operator==(rhs); + } +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash<VideoCommon::SamplerCacheKey> { + std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace VideoCommon { + +template <typename SamplerType, typename SamplerStorageType> +class SamplerCache { +public: + SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { + const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); + auto& sampler = entry->second; + if (is_cache_miss) { + sampler = CreateSampler(tsc); + } + return ToSamplerType(sampler); + } + +protected: + virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; + + virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; + +private: + std::unordered_map<SamplerCacheKey, SamplerStorageType> cache; +}; + +} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index e4c438792..2da595c0d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -116,6 +116,8 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { // Continue scanning for an exit method. break; } + default: + break; } } return exit_method = ExitMethod::AlwaysReturn; @@ -206,4 +208,4 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index baee89107..2098c1170 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -9,6 +9,7 @@ namespace VideoCommon::Shader { +using Tegra::Shader::HalfType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -18,48 +19,50 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { if (opcode->get().GetId() == OpCode::Id::HADD2_C || opcode->get().GetId() == OpCode::Id::HADD2_R) { - UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); + if (instr.alu_half.ftz != 0) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } - UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); const bool negate_a = opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; const bool negate_b = opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; - const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); - - // instr.alu_half.type_a + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); + op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); - Node op_b = [&]() { + auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> { switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HMUL2_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::HADD2_R: case OpCode::Id::HMUL2_R: - return GetRegister(instr.gpr20); + return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; default: UNREACHABLE(); - return Immediate(0); + return {HalfType::F32, Immediate(0)}; } }(); - op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); + op_b = UnpackHalfFloat(op_b, type_b); + // redeclaration to avoid a bug in clang with reusing local bindings in lambdas + Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); Node value = [&]() { - MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, meta, op_a, op_b); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt); case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, meta, op_a, op_b); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt); default: UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); return Immediate(0); } }(); + value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); SetRegister(bb, instr.gpr0, value); @@ -67,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index c2164ba50..fbcd35b18 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -17,34 +17,33 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); + if (instr.alu_half_imm.ftz != 0) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } else { UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); } - UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, - "Half float immediate saturation not implemented"); - Node op_a = GetRegister(instr.gpr8); + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); const Node op_b = UnpackHalfImmediate(instr, true); Node value = [&]() { - MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; switch (opcode->get().GetId()) { case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, meta, op_a, op_b); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, meta, op_a, op_b); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b); default: UNREACHABLE(); return Immediate(0); } }(); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); + value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); + value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); SetRegister(bb, instr.gpr0, value); - return pc; } diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 55a6fbbf2..b5ec9a6f5 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -18,13 +18,29 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: { + case OpCode::Id::I2I_R: + case OpCode::Id::I2I_C: + case OpCode::Id::I2I_IMM: { UNIMPLEMENTED_IF(instr.conversion.selector); + UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.alu.saturate_d); const bool input_signed = instr.conversion.is_input_signed; const bool output_signed = instr.conversion.is_output_signed; - Node value = GetRegister(instr.gpr20); + Node value = [&]() { + switch (opcode->get().GetId()) { + case OpCode::Id::I2I_R: + return GetRegister(instr.gpr20); + case OpCode::Id::I2I_C: + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::I2I_IMM: + return Immediate(instr.alu.GetSignedImm20_20()); + default: + UNREACHABLE(); + return Immediate(0); + } + }(); value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, @@ -38,17 +54,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: { - UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); + case OpCode::Id::I2F_C: + case OpCode::Id::I2F_IMM: { + UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.selector); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); Node value = [&]() { - if (instr.is_b_gpr) { + switch (opcode->get().GetId()) { + case OpCode::Id::I2F_R: return GetRegister(instr.gpr20); - } else { + case OpCode::Id::I2F_C: return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::I2F_IMM: + return Immediate(instr.alu.GetSignedImm20_20()); + default: + UNREACHABLE(); + return Immediate(0); } }(); const bool input_signed = instr.conversion.is_input_signed; @@ -62,24 +85,31 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: { - UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + case OpCode::Id::F2F_C: + case OpCode::Id::F2F_IMM: { + UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2F is not implemented"); Node value = [&]() { - if (instr.is_b_gpr) { + switch (opcode->get().GetId()) { + case OpCode::Id::F2F_R: return GetRegister(instr.gpr20); - } else { + case OpCode::Id::F2F_C: return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::F2F_IMM: + return GetImmediate19(instr); + default: + UNREACHABLE(); + return Immediate(0); } }(); value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { - switch (instr.conversion.f2f.rounding) { + switch (instr.conversion.f2f.GetRoundingMode()) { case Tegra::Shader::F2fRoundingOp::None: return value; case Tegra::Shader::F2fRoundingOp::Round: @@ -90,10 +120,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return Operation(OperationCode::FCeil, PRECISE, value); case Tegra::Shader::F2fRoundingOp::Trunc: return Operation(OperationCode::FTrunc, PRECISE, value); + default: + UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", + static_cast<u32>(instr.conversion.f2f.rounding.Value())); + return Immediate(0); } - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - static_cast<u32>(instr.conversion.f2f.rounding.Value())); - return Immediate(0); }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); @@ -102,15 +133,22 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: { + case OpCode::Id::F2I_C: + case OpCode::Id::F2I_IMM: { UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2I is not implemented"); Node value = [&]() { - if (instr.is_b_gpr) { + switch (opcode->get().GetId()) { + case OpCode::Id::F2I_R: return GetRegister(instr.gpr20); - } else { + case OpCode::Id::F2I_C: return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::F2I_IMM: + return GetImmediate19(instr); + default: + UNREACHABLE(); + return Immediate(0); } }(); @@ -134,7 +172,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { }(); const bool is_signed = instr.conversion.is_output_signed; value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); + value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); SetRegister(bb, instr.gpr0, value); break; diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 748368555..1dd94bf9d 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -18,11 +18,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.hset2.ftz != 0); + if (instr.hset2.ftz != 0) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } + + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); + op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); - // instr.hset2.type_a - // instr.hset2.type_b - Node op_a = GetRegister(instr.gpr8); Node op_b = [&]() { switch (opcode->get().GetId()) { case OpCode::Id::HSET2_R: @@ -32,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); - - op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); + op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; - const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); + const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index e68512692..6e59eb650 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); - Node op_a = GetRegister(instr.gpr8); + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - const Node op_b = [&]() { + Node op_b = [&]() { switch (opcode->get().GetId()) { case OpCode::Id::HSETP2_R: return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, @@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); + op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); // We can't use the constant predicate as destination. ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); @@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const OperationCode pair_combiner = instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; - MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; - const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); + const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); const Node first_pred = Operation(pair_combiner, comparison); // Set the primary predicate to the result of Predicate OP SecondPredicate diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 7a07c5ec6..a425f9eb7 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { } constexpr auto identity = HalfType::H0_H1; - - const HalfType type_a = instr.hfma2.type_a; - const Node op_a = GetRegister(instr.gpr8); - bool neg_b{}, neg_c{}; auto [saturate, type_b, op_b, type_c, op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { @@ -38,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { case OpCode::Id::HFMA2_CR: neg_b = instr.hfma2.negate_b; neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, instr.hfma2.type_b, + return {instr.hfma2.saturate, HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; case OpCode::Id::HFMA2_RC: neg_b = instr.hfma2.negate_b; neg_c = instr.hfma2.negate_c; return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - instr.hfma2.type_b, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; + HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::HFMA2_RR: neg_b = instr.hfma2.rr.negate_b; neg_c = instr.hfma2.rr.negate_c; @@ -60,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { return {false, identity, Immediate(0), identity, Immediate(0)}; } }(); - UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); - op_b = GetOperandAbsNegHalf(op_b, false, neg_b); - op_c = GetOperandAbsNegHalf(op_c, false, neg_c); + const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); + op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); + op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; - Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); + Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); + value = GetSaturatedHalfFloat(value, saturate); value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); SetRegister(bb, instr.gpr0, value); @@ -74,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea3c71eed..ea1092db1 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -18,6 +19,23 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +namespace { +u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + return 1; + } +} +} // namespace + u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -85,8 +103,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LD_L: { - UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", - static_cast<u32>(instr.ld_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", + static_cast<u64>(instr.ld_l.unknown.Value())); const auto GetLmem = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -126,45 +144,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LDG: { - const u32 count = [&]() { - switch (instr.ldg.type) { - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; - default: - UNIMPLEMENTED_MSG("Unimplemented LDG size!"); - return 1; - } - }(); - - const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - - bb.push_back(Comment( - fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); - - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; - used_global_memory_bases.insert(descriptor); - - const Node immediate_offset = - Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); - const Node base_real_address = - Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + const u32 count = GetUniformTypeElementsCount(instr.ldg.type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); SetTemporal(bb, i, gmem); @@ -174,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::STG: { + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.stg.immediate_offset.Value()), true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(instr.stg.type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -205,8 +215,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: { - UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", - static_cast<u32>(instr.st_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", + static_cast<u64>(instr.st_l.cache_management.Value())); const auto GetLmemAddr = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -236,4 +246,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write) { + const Node base_address{ + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; + const auto cbuf = std::get_if<CbufNode>(base_address); + ASSERT(cbuf != nullptr); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + + bb.push_back( + Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); + + const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); + auto& usage = entry->second; + if (is_write) { + usage.is_written = true; + } else { + usage.is_read = true; + } + + const auto real_address = + Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); + + return {real_address, base_address, descriptor}; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a775b402b..5b033126d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - + bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { @@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto process_mode = instr.tex.GetTextureProcessMode(); WriteTexInstructionFloat( bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); + break; + } + case OpCode::Id::TEX_B: { + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); + } + + const TextureType texture_type{instr.tex_b.texture_type}; + const bool is_array = instr.tex_b.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); + const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex_b.GetTextureProcessMode(); + WriteTexInstructionFloat(bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, + is_array, is_aoffi, {instr.gpr20})); break; } case OpCode::Id::TEXS: { @@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { WriteTexsInstructionFloat(bb, instr, values); break; } + case OpCode::Id::TXQ_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TXQ: { if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); @@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless + ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, + false) + : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); u32 indexer = 0; switch (instr.txq.query_type) { @@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + Operation(OperationCode::TextureQueryDimensions, meta, + GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); SetTemporal(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { @@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TMML_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TMML: { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); @@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = is_bindless + ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) + : GetSampler(instr.sampler, texture_type, is_array, false); std::vector<Node> coords; @@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { coords.push_back(GetRegister(instr.gpr8.Value() + 1)); texture_type = TextureType::Texture2D; } - + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { + if (!instr.tmml.IsComponentEnabled(element)) { + continue; + } auto params = coords; MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporal(bb, element, value); + SetTemporal(bb, indexer++, value); } - for (u32 element = 0; element < 2; ++element) { - SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); } - break; } case OpCode::Id::TLDS: { @@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, + bool is_array, bool is_shadow) { + const Node sampler_register = GetRegister(reg); + const Node base_sampler = + TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); + const auto cbuf = std::get_if<CbufNode>(base_sampler); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + const auto cbuf_index = cbuf->GetIndex(); + const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); + + // If this sampler has already been used, return the existing mapping. + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_samplers.size(); + const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; +} + void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { u32 dest_elem = 0; for (u32 elem = 0; elem < 4; ++elem) { @@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector<Node> coords, Node array, Node depth_compare, u32 bias_offset, - std::vector<Node> aoffi) { + std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg) { const bool is_array = array; const bool is_shadow = depth_compare; + const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = is_bindless + ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) + : GetSampler(instr.sampler, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. + // LOD selection (either via bias or explicit textureLod) not + // supported in GL for sampler2DArrayShadow and + // samplerCubeArrayShadow. const bool gl_lod_supported = !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); @@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, lod = Immediate(0.0f); break; case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 - // field with an offset depending on the usage of the other registers + // If present, lod or bias are always stored in the register + // indexed by the gpr20 field with an offset depending on the + // usage of the other registers bias = GetRegister(instr.gpr20.Value() + bias_offset); break; case TextureProcessMode::LL: @@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi) { + bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { const bool lod_bias_enabled{ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; + const bool is_bindless = bindless_reg.has_value(); + u64 parameter_register = instr.gpr20.Value(); + if (is_bindless) { + ++parameter_register; + } + + const u32 bias_lod_offset = (is_bindless ? 1 : 0); if (lod_bias_enabled) { ++parameter_register; } @@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, + aoffi, bindless_reg); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -459,14 +533,13 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, + {}); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, bool is_array, bool is_aoffi) { const std::size_t coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index db15c0718..04a776398 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -56,9 +56,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { instr.xmad.mode, Immediate(static_cast<u32>(instr.xmad.imm20_16)), GetRegister(instr.gpr39)}; + default: + UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); + return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; } - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; }(); op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index ac5112d78..196235e5d 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -21,6 +21,13 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) + : program_code{program_code}, main_offset{main_offset} { + Decode(); +} + +ShaderIR::~ShaderIR() = default; + Node ShaderIR::StoreNode(NodeData&& node_data) { auto store = std::make_unique<NodeData>(node_data); const Node node = store.get(); @@ -189,7 +196,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); + return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); +} + +Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { + return Operation(OperationCode::HUnpack, type, value); } Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { @@ -209,17 +220,26 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { if (absolute) { - value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); + value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); } if (negate) { - value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), + value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), GetPredicate(true)); } return value; } +Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { + if (!saturate) { + return value; + } + const Node positive_zero = Immediate(std::copysignf(0, 1)); + const Node positive_one = Immediate(1.0f); + return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); +} + Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { + const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::LogicalFLessThan}, {PredCondition::Equal, OperationCode::LogicalFEqual}, {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, @@ -255,7 +275,7 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, Node op_b) { - static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { + const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::LogicalILessThan}, {PredCondition::Equal, OperationCode::LogicalIEqual}, {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, @@ -283,40 +303,32 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si return predicate; } -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, - const MetaHalfArithmetic& meta, Node op_a, Node op_b) { - - UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || - condition == PredCondition::NotEqualWithNan || - condition == PredCondition::LessEqualWithNan || - condition == PredCondition::GreaterThanWithNan || - condition == PredCondition::GreaterEqualWithNan, - "Unimplemented NaN comparison for half floats"); - - static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { +Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, + Node op_b) { + const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::Logical2HLessThan}, {PredCondition::Equal, OperationCode::Logical2HEqual}, {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, - {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; + {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, + {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, + {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, + {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, + {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; const auto comparison{PredicateComparisonTable.find(condition)}; UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), "Unknown predicate comparison operation"); - const Node predicate = Operation(comparison->second, meta, op_a, op_b); + const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); return predicate; } OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { + const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { {PredOperation::And, OperationCode::LogicalAnd}, {PredOperation::Or, OperationCode::LogicalOr}, {PredOperation::Xor, OperationCode::LogicalXor}, @@ -434,11 +446,14 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { return OperationCode::LogicalUGreaterEqual; case OperationCode::INegate: UNREACHABLE_MSG("Can't negate an unsigned integer"); + return {}; case OperationCode::IAbsolute: UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); + return {}; + default: + UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); + return {}; } - UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); - return {}; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 4888998d3..e4253fdb3 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -109,11 +109,13 @@ enum class OperationCode { UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint - HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 HAbsolute, /// (f16vec2 a) -> f16vec2 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 HMergeF32, /// (f16vec2 src) -> float HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 @@ -150,12 +152,18 @@ enum class OperationCode { LogicalUNotEqual, /// (uint a, uint b) -> bool LogicalUGreaterEqual, /// (uint a, uint b) -> bool - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Texture, /// (MetaTexture, float[N] coords) -> float4 TextureLod, /// (MetaTexture, float[N] coords) -> float4 @@ -196,9 +204,23 @@ enum class ExitMethod { class Sampler { public: + // Use this constructor for bounded Samplers explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + // Use this constructor for bindless Samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} + + // Use this only for serialization/deserialization + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} std::size_t GetOffset() const { return offset; @@ -220,9 +242,18 @@ public: return is_shadow; } + bool IsBindless() const { + return is_bindless; + } + + std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + bool operator<(const Sampler& rhs) const { - return std::tie(offset, index, type, is_array, is_shadow) < - std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); + return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < + std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, + rhs.is_bindless); } private: @@ -231,8 +262,9 @@ private: std::size_t offset{}; std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; class ConstBuffer { @@ -276,15 +308,13 @@ struct GlobalMemoryBase { } }; -struct MetaArithmetic { - bool precise{}; +struct GlobalMemoryUsage { + bool is_read{}; + bool is_written{}; }; -struct MetaHalfArithmetic { +struct MetaArithmetic { bool precise{}; - std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, - Tegra::Shader::HalfType::H0_H1, - Tegra::Shader::HalfType::H0_H1}; }; struct MetaTexture { @@ -300,39 +330,29 @@ struct MetaTexture { constexpr MetaArithmetic PRECISE = {true}; constexpr MetaArithmetic NO_PRECISE = {false}; -constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; -using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; +using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; /// Holds any kind of operation that can be done in the IR class OperationNode final { public: - template <typename... T> - explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} + explicit OperationNode(OperationCode code) : code{code} {} - template <typename... T> - explicit constexpr OperationNode(OperationCode code, Meta&& meta) - : code{code}, meta{std::move(meta)} {} + explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {} template <typename... T> - explicit constexpr OperationNode(OperationCode code, const T*... operands) + explicit OperationNode(OperationCode code, const T*... operands) : OperationNode(code, {}, operands...) {} template <typename... T> - explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) - : code{code}, meta{std::move(meta)} { - - auto operands_list = {operands_...}; - for (auto& operand : operands_list) { - operands.push_back(operand); - } - } + explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_) + : code{code}, meta{std::move(meta)}, operands{operands_...} {} explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) : code{code}, meta{meta}, operands{std::move(operands)} {} explicit OperationNode(OperationCode code, std::vector<Node>&& operands) - : code{code}, meta{}, operands{std::move(operands)} {} + : code{code}, operands{std::move(operands)} {} OperationCode GetCode() const { return code; @@ -538,11 +558,8 @@ private: class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) - : program_code{program_code}, main_offset{main_offset} { - - Decode(); - } + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); + ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { return basic_blocks; @@ -578,8 +595,8 @@ public: return used_clip_distances; } - const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { - return used_global_memory_bases; + const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { + return used_global_memory; } std::size_t GetLength() const { @@ -706,10 +723,14 @@ private: /// Unpacks a half immediate from an instruction Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); + /// Unpacks a binary value into a half float pair with a type format + Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); /// Merges a half pair into another value Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); /// Conditionally absolute/negated half float pair. Absolute is applied first Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); + /// Conditionally saturates a half float pair + Node GetSaturatedHalfFloat(Node value, bool saturate = true); /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); @@ -717,8 +738,7 @@ private: Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, Node op_a, Node op_b); /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, - const MetaHalfArithmetic& meta, Node op_a, Node op_b); + Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate combiner operation OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); @@ -730,6 +750,11 @@ private: const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + // Accesses a texture sampler for a bindless texture. + const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, + Tegra::Shader::TextureType type, bool is_array, + bool is_shadow); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -743,7 +768,8 @@ private: Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi); + bool is_array, bool is_aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, @@ -763,7 +789,8 @@ private: Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); + Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); @@ -775,11 +802,17 @@ private: void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc); - Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); + Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; + + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); + std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, + s64 cursor) const; - std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); + std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write); template <typename... T> Node Operation(OperationCode code, const T*... operands) { @@ -791,12 +824,10 @@ private: return StoreNode(OperationNode(code, std::move(meta), operands...)); } - template <typename... T> Node Operation(OperationCode code, std::vector<Node>&& operands) { return StoreNode(OperationNode(code, std::move(operands))); } - template <typename... T> Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); } @@ -834,7 +865,7 @@ private: std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; - std::set<GlobalMemoryBase> used_global_memory_bases; + std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 4505667ff..19ede1eb9 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -17,22 +17,24 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, for (; cursor >= 0; --cursor) { const Node node = code.at(cursor); if (const auto operation = std::get_if<OperationNode>(node)) { - if (operation->GetCode() == operation_code) + if (operation->GetCode() == operation_code) { return {node, cursor}; + } } if (const auto conditional = std::get_if<ConditionalNode>(node)) { const auto& conditional_code = conditional->GetCode(); const auto [found, internal_cursor] = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); - if (found) + if (found) { return {found, cursor}; + } } } return {}; } } // namespace -Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { +Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; @@ -65,7 +67,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } -std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { +std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand const auto [found, found_cursor] = @@ -80,7 +82,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, } std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) { + s64 cursor) const { for (; cursor >= 0; --cursor) { const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); if (!found_node) { diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 3b022a456..6384fa8d2 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -178,39 +178,44 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::ABGR8S; case Tegra::Texture::ComponentType::UINT: return PixelFormat::ABGR8UI; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::B5G6R5: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: return PixelFormat::B5G6R5U; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::A2B10G10R10: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: return PixelFormat::A2B10G10R10U; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::A1B5G5R5: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: return PixelFormat::A1B5G5R5U; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::R8: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: return PixelFormat::R8U; case Tegra::Texture::ComponentType::UINT: return PixelFormat::R8UI; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::G8R8: // TextureFormat::G8R8 is actually ordered red then green, as such we can use // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath @@ -220,50 +225,55 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::RG8U; case Tegra::Texture::ComponentType::SNORM: return PixelFormat::RG8S; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::R16_G16_B16_A16: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: return PixelFormat::RGBA16U; case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::RGBA16F; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::BF10GF11RF11: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::R11FG11FB10F; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32_A32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::RGBA32F; case Tegra::Texture::ComponentType::UINT: return PixelFormat::RGBA32UI; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::R32_G32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::RG32F; case Tegra::Texture::ComponentType::UINT: return PixelFormat::RG32UI; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::R32_G32_B32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::RGB32F; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::R16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -276,18 +286,20 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::R16UI; case Tegra::Texture::ComponentType::SINT: return PixelFormat::R16I; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::R32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::R32F; case Tegra::Texture::ComponentType::UINT: return PixelFormat::R32UI; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::ZF32: return PixelFormat::Z32F; case Tegra::Texture::TextureFormat::Z16: @@ -310,9 +322,10 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::DXN2UNORM; case Tegra::Texture::ComponentType::SNORM: return PixelFormat::DXN2SNORM; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; case Tegra::Texture::TextureFormat::BC7U: return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U; case Tegra::Texture::TextureFormat::BC6H_UF16: @@ -343,15 +356,17 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::RG16UI; case Tegra::Texture::ComponentType::SINT: return PixelFormat::RG16I; + default: + break; } - LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); - UNREACHABLE(); + break; default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format), - static_cast<u32>(component_type)); - UNREACHABLE(); - return PixelFormat::ABGR8U; + break; } + LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format), + static_cast<u32>(component_type)); + UNREACHABLE(); + return PixelFormat::ABGR8U; } ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { @@ -513,8 +528,9 @@ bool IsFormatBCn(PixelFormat format) { case PixelFormat::DXT45_SRGB: case PixelFormat::BC7U_SRGB: return true; + default: + return false; } - return false; } } // namespace VideoCore::Surface diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index b508d64e9..a9b8f69af 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -25,8 +25,8 @@ class InputBitStream { public: - explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0) - : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} + explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) + : m_CurByte(ptr), m_NextBit(start_offset % 8) {} ~InputBitStream() = default; @@ -55,12 +55,9 @@ public: } private: - const int m_NumBits; const unsigned char* m_CurByte; int m_NextBit = 0; int m_BitsRead = 0; - - bool done = false; }; class OutputBitStream { @@ -114,7 +111,6 @@ private: const int m_NumBits; unsigned char* m_CurByte; int m_NextBit = 0; - int m_BitsRead = 0; bool done = false; }; @@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC { std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height) { uint32_t blockIdx = 0; + std::size_t depth_offset = 0; std::vector<uint8_t> outData(height * width * depth * 4); for (uint32_t k = 0; k < depth; k++) { for (uint32_t j = 0; j < height; j += block_height) { @@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he uint32_t decompWidth = std::min(block_width, width - i); uint32_t decompHeight = std::min(block_height, height - j); - uint8_t* outRow = outData.data() + (j * width + i) * 4; + uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; for (uint32_t jj = 0; jj < decompHeight; jj++) { memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); } @@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he blockIdx++; } } + depth_offset += height * width * 4; } return outData; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 995d0e068..217805386 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 } } +void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, + const u32 block_height, const std::size_t copy_size, const u8* source_data, + u8* swizzle_data) { + const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; + std::size_t count = 0; + for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { + const std::size_t gob_address_y = + (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + + ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; + const auto& table = legacy_swizzle_table[y % gob_size_y]; + for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { + const std::size_t gob_address = + gob_address_y + (x / gob_size_x) * gob_size * block_height; + const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; + const u8* source_line = source_data + count; + u8* dest_addr = swizzle_data + swizzled_offset; + count++; + + std::memcpy(dest_addr, source_line, 1); + } + } +} + std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, u32 height) { std::vector<u8> rgba_data; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e078fa274..e072d8401 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 offset_x, u32 offset_y); +void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, + const u32 block_height, const std::size_t copy_size, const u8* source_data, + u8* swizzle_data); + } // namespace Tegra::Texture diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index cb82ecf3f..60cda0ca3 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -5,6 +5,8 @@ #include <memory> #include "core/core.h" #include "core/settings.h" +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/video_core.h" @@ -16,6 +18,14 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); } +std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { + if (Settings::values.use_asynchronous_gpu_emulation) { + return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer()); + } + + return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer()); +} + u16 GetResolutionScaleFactor(const RendererBase& renderer) { return static_cast<u16>( Settings::values.resolution_factor diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 3c583f195..b8e0ac372 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -14,6 +14,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace Tegra { +class GPU; +} + namespace VideoCore { class RendererBase; @@ -27,6 +31,9 @@ class RendererBase; std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, Core::System& system); +/// Creates an emulated GPU instance using the given system context. +std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system); + u16 GetResolutionScaleFactor(const RendererBase& renderer); } // namespace VideoCore diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 2eb86d6e5..7e883991a 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -7,6 +7,8 @@ add_executable(yuzu Info.plist about_dialog.cpp about_dialog.h + applets/error.cpp + applets/error.h applets/profile_select.cpp applets/profile_select.h applets/software_keyboard.cpp @@ -80,8 +82,6 @@ add_executable(yuzu util/limitable_input_dialog.h util/sequence_dialog/sequence_dialog.cpp util/sequence_dialog/sequence_dialog.h - util/spinbox.cpp - util/spinbox.h util/util.cpp util/util.h compatdb.cpp @@ -151,6 +151,12 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core) target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) +target_compile_definitions(yuzu PRIVATE + # Use QStringBuilder for string concatenation to reduce + # the overall number of temporary strings created. + -DQT_USE_QSTRINGBUILDER +) + if (YUZU_ENABLE_COMPATIBILITY_REPORTING) target_compile_definitions(yuzu PRIVATE -DYUZU_ENABLE_COMPATIBILITY_REPORTING) endif() diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp index 3efa65a38..d39b3f07a 100644 --- a/src/yuzu/about_dialog.cpp +++ b/src/yuzu/about_dialog.cpp @@ -9,10 +9,10 @@ AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { ui->setupUi(this); - ui->labelLogo->setPixmap(QIcon::fromTheme("yuzu").pixmap(200)); - ui->labelBuildInfo->setText( - ui->labelBuildInfo->text().arg(Common::g_build_fullname, Common::g_scm_branch, - Common::g_scm_desc, QString(Common::g_build_date).left(10))); + ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200)); + ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg( + QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch), + QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10))); } AboutDialog::~AboutDialog() = default; diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp new file mode 100644 index 000000000..106dde9e2 --- /dev/null +++ b/src/yuzu/applets/error.cpp @@ -0,0 +1,59 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <QDateTime> +#include "core/hle/lock.h" +#include "yuzu/applets/error.h" +#include "yuzu/main.h" + +QtErrorDisplay::QtErrorDisplay(GMainWindow& parent) { + connect(this, &QtErrorDisplay::MainWindowDisplayError, &parent, + &GMainWindow::ErrorDisplayDisplayError, Qt::QueuedConnection); + connect(&parent, &GMainWindow::ErrorDisplayFinished, this, + &QtErrorDisplay::MainWindowFinishedError, Qt::DirectConnection); +} + +QtErrorDisplay::~QtErrorDisplay() = default; + +void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const { + this->callback = std::move(finished); + emit MainWindowDisplayError( + tr("An error has occured.\nPlease try again or contact the developer of the " + "software.\n\nError Code: %1-%2 (0x%3)") + .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) + .arg(error.description, 4, 10, QChar::fromLatin1('0')) + .arg(error.raw, 8, 16, QChar::fromLatin1('0'))); +} + +void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, + std::function<void()> finished) const { + this->callback = std::move(finished); + emit MainWindowDisplayError( + tr("An error occured on %1 at %2.\nPlease try again or contact the " + "developer of the software.\n\nError Code: %3-%4 (0x%5)") + .arg(QDateTime::fromSecsSinceEpoch(time.count()).toString("dddd, MMMM d, yyyy")) + .arg(QDateTime::fromSecsSinceEpoch(time.count()).toString("h:mm:ss A")) + .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) + .arg(error.description, 4, 10, QChar::fromLatin1('0')) + .arg(error.raw, 8, 16, QChar::fromLatin1('0'))); +} + +void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_text, + std::string fullscreen_text, + std::function<void()> finished) const { + this->callback = std::move(finished); + emit MainWindowDisplayError( + tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") + .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) + .arg(error.description, 4, 10, QChar::fromLatin1('0')) + .arg(error.raw, 8, 16, QChar::fromLatin1('0')) + .arg(QString::fromStdString(dialog_text)) + .arg(QString::fromStdString(fullscreen_text))); +} + +void QtErrorDisplay::MainWindowFinishedError() { + // Acquire the HLE mutex + std::lock_guard lock{HLE::g_hle_lock}; + callback(); +} diff --git a/src/yuzu/applets/error.h b/src/yuzu/applets/error.h new file mode 100644 index 000000000..b0932d895 --- /dev/null +++ b/src/yuzu/applets/error.h @@ -0,0 +1,33 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <QObject> + +#include "core/frontend/applets/error.h" + +class GMainWindow; + +class QtErrorDisplay final : public QObject, public Core::Frontend::ErrorApplet { + Q_OBJECT + +public: + explicit QtErrorDisplay(GMainWindow& parent); + ~QtErrorDisplay() override; + + void ShowError(ResultCode error, std::function<void()> finished) const override; + void ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, + std::function<void()> finished) const override; + void ShowCustomErrorText(ResultCode error, std::string dialog_text, std::string fullscreen_text, + std::function<void()> finished) const override; + +signals: + void MainWindowDisplayError(QString error) const; + +private: + void MainWindowFinishedError(); + + mutable std::function<void()> callback; +}; diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp index 743b24d76..7fbc9deeb 100644 --- a/src/yuzu/applets/profile_select.cpp +++ b/src/yuzu/applets/profile_select.cpp @@ -84,10 +84,10 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent) tree_view->setContextMenuPolicy(Qt::NoContextMenu); item_model->insertColumns(0, 1); - item_model->setHeaderData(0, Qt::Horizontal, "Users"); + item_model->setHeaderData(0, Qt::Horizontal, tr("Users")); // We must register all custom types with the Qt Automoc system so that we are able to use it - // with signals/slots. In this case, QList falls under the umbrells of custom types. + // with signals/slots. In this case, QList falls under the umbrella of custom types. qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>"); layout->setContentsMargins(0, 0, 0, 0); diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index c29f2d2dc..810954b36 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -91,8 +91,8 @@ void EmuThread::run() { class GGLContext : public Core::Frontend::GraphicsContext { public: - explicit GGLContext(QOpenGLContext* shared_context) : surface() { - context = std::make_unique<QOpenGLContext>(shared_context); + explicit GGLContext(QOpenGLContext* shared_context) + : context{std::make_unique<QOpenGLContext>(shared_context)} { surface.setFormat(shared_context->format()); surface.create(); } @@ -186,8 +186,7 @@ private: }; GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread) - : QWidget(parent), child(nullptr), context(nullptr), emu_thread(emu_thread) { - + : QWidget(parent), emu_thread(emu_thread) { setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") .arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc)); setAttribute(Qt::WA_AcceptTouchEvents); @@ -378,7 +377,12 @@ void GRenderWindow::InitRenderTarget() { // WA_DontShowOnScreen, WA_DeleteOnClose QSurfaceFormat fmt; fmt.setVersion(4, 3); - fmt.setProfile(QSurfaceFormat::CoreProfile); + if (Settings::values.use_compatibility_profile) { + fmt.setProfile(QSurfaceFormat::CompatibilityProfile); + fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); + } else { + fmt.setProfile(QSurfaceFormat::CoreProfile); + } // TODO: expose a setting for buffer value (ie default/single/double/triple) fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); shared_context = std::make_unique<QOpenGLContext>(); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 9608b959f..3df33aca1 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -10,7 +10,6 @@ #include <QImage> #include <QThread> #include <QWidget> -#include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index c8b0a5ec0..5477f050c 100644 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp @@ -58,7 +58,7 @@ void CompatDB::Submit() { button(NextButton)->setEnabled(false); button(NextButton)->setText(tr("Submitting")); - button(QWizard::CancelButton)->setVisible(false); + button(CancelButton)->setVisible(false); testcase_watcher.setFuture(QtConcurrent::run( [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); @@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() { tr("An error occured while sending the Testcase")); button(NextButton)->setEnabled(true); button(NextButton)->setText(tr("Next")); - button(QWizard::CancelButton)->setVisible(true); + button(CancelButton)->setVisible(true); } else { next(); // older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a // workaround - button(QWizard::CancelButton)->setVisible(false); + button(CancelButton)->setVisible(false); } } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 8725a78dc..6c6f047d8 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -389,6 +389,8 @@ void Config::ReadValues() { Settings::values.resolution_factor = ReadSetting("resolution_factor", 1.0).toFloat(); Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool(); Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt(); + Settings::values.use_compatibility_profile = + ReadSetting("use_compatibility_profile", true).toBool(); Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool(); Settings::values.use_accurate_gpu_emulation = ReadSetting("use_accurate_gpu_emulation", false).toBool(); @@ -661,6 +663,7 @@ void Config::SaveValues() { WriteSetting("resolution_factor", (double)Settings::values.resolution_factor, 1.0); WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true); WriteSetting("frame_limit", Settings::values.frame_limit, 100); + WriteSetting("use_compatibility_profile", Settings::values.use_compatibility_profile, true); WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true); WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false); WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation, diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index 51bd1f121..32c05b797 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -12,13 +12,17 @@ #include "yuzu/hotkeys.h" ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry) - : QDialog(parent), registry(registry), ui(new Ui::ConfigureDialog) { + : QDialog(parent), ui(new Ui::ConfigureDialog), registry(registry) { ui->setupUi(this); ui->hotkeysTab->Populate(registry); this->setConfiguration(); this->PopulateSelectionList(); + + setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint); + connect(ui->selectorList, &QListWidget::itemSelectionChanged, this, &ConfigureDialog::UpdateVisibleTabs); + adjustSize(); ui->selectorList->setCurrentRow(0); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 0a9883d37..08ea41b0f 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -69,15 +69,20 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::setConfiguration() { + const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + ui->resolution_factor_combobox->setCurrentIndex( static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); ui->frame_limit->setValue(Settings::values.frame_limit); + ui->use_compatibility_profile->setEnabled(runtime_lock); + ui->use_compatibility_profile->setChecked(Settings::values.use_compatibility_profile); + ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); - ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); - ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->force_30fps_mode->setEnabled(runtime_lock); ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue)); @@ -88,6 +93,7 @@ void ConfigureGraphics::applyConfiguration() { ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); Settings::values.frame_limit = ui->frame_limit->value(); + Settings::values.use_compatibility_profile = ui->use_compatibility_profile->isChecked(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); Settings::values.use_asynchronous_gpu_emulation = diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 15ab18ecd..0f6f6c003 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -50,6 +50,13 @@ </layout> </item> <item> + <widget class="QCheckBox" name="use_compatibility_profile"> + <property name="text"> + <string>Use OpenGL compatibility profile</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="use_disk_shader_cache"> <property name="text"> <string>Use disk shader cache</string> diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp index bfb562535..a7a8752e5 100644 --- a/src/yuzu/configuration/configure_hotkeys.cpp +++ b/src/yuzu/configuration/configure_hotkeys.cpp @@ -66,20 +66,21 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) { } void ConfigureHotkeys::Configure(QModelIndex index) { - if (index.parent() == QModelIndex()) + if (!index.parent().isValid()) { return; + } index = index.sibling(index.row(), 1); - auto* model = ui->hotkey_list->model(); - auto previous_key = model->data(index); - - auto* hotkey_dialog = new SequenceDialog; - int return_code = hotkey_dialog->exec(); + auto* const model = ui->hotkey_list->model(); + const auto previous_key = model->data(index); - auto key_sequence = hotkey_dialog->GetSequence(); + SequenceDialog hotkey_dialog{this}; - if (return_code == QDialog::Rejected || key_sequence.isEmpty()) + const int return_code = hotkey_dialog.exec(); + const auto key_sequence = hotkey_dialog.GetSequence(); + if (return_code == QDialog::Rejected || key_sequence.isEmpty()) { return; + } if (IsUsedKey(key_sequence) && key_sequence != QKeySequence(previous_key.toString())) { QMessageBox::critical(this, tr("Error in inputted key"), @@ -90,7 +91,7 @@ void ConfigureHotkeys::Configure(QModelIndex index) { } } -bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) { +bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) const { return GetUsedKeyList().contains(key_sequence); } diff --git a/src/yuzu/configuration/configure_hotkeys.h b/src/yuzu/configuration/configure_hotkeys.h index cd203aad6..73fb8a175 100644 --- a/src/yuzu/configuration/configure_hotkeys.h +++ b/src/yuzu/configuration/configure_hotkeys.h @@ -6,7 +6,6 @@ #include <memory> #include <QWidget> -#include "core/settings.h" namespace Ui { class ConfigureHotkeys; @@ -39,7 +38,7 @@ signals: private: void Configure(QModelIndex index); - bool IsUsedKey(QKeySequence key_sequence); + bool IsUsedKey(QKeySequence key_sequence) const; QList<QKeySequence> GetUsedKeyList() const; std::unique_ptr<Ui::ConfigureHotkeys> ui; diff --git a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp index 67ed0ba6d..1c80082a4 100644 --- a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp +++ b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp @@ -135,7 +135,7 @@ GraphicsBreakPointsWidget::GraphicsBreakPointsWidget( std::shared_ptr<Tegra::DebugContext> debug_context, QWidget* parent) : QDockWidget(tr("Maxwell Breakpoints"), parent), Tegra::DebugContext::BreakPointObserver( debug_context) { - setObjectName("TegraBreakPointsWidget"); + setObjectName(QStringLiteral("TegraBreakPointsWidget")); status_text = new QLabel(tr("Emulation running")); resume_button = new QPushButton(tr("Resume")); diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index 86e03e46d..f594ef076 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -47,7 +47,7 @@ private: #endif MicroProfileDialog::MicroProfileDialog(QWidget* parent) : QWidget(parent, Qt::Dialog) { - setObjectName("MicroProfile"); + setObjectName(QStringLiteral("MicroProfile")); setWindowTitle(tr("MicroProfile")); resize(1000, 600); // Remove the "?" button from the titlebar and enable the maximize button @@ -191,7 +191,7 @@ void MicroProfileDrawText(int x, int y, u32 hex_color, const char* text, u32 tex for (u32 i = 0; i < text_length; ++i) { // Position the text baseline 1 pixel above the bottom of the text cell, this gives nice // vertical alignment of text for a wide range of tested fonts. - mp_painter->drawText(x, y + MICROPROFILE_TEXT_HEIGHT - 2, QChar(text[i])); + mp_painter->drawText(x, y + MICROPROFILE_TEXT_HEIGHT - 2, QString{QLatin1Char{text[i]}}); x += MICROPROFILE_TEXT_WIDTH + 1; } } diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 593bb681f..cd8180f8b 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -91,19 +91,19 @@ WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTa WaitTreeMutexInfo::~WaitTreeMutexInfo() = default; QString WaitTreeMutexInfo::GetText() const { - return tr("waiting for mutex 0x%1").arg(mutex_address, 16, 16, QLatin1Char('0')); + return tr("waiting for mutex 0x%1").arg(mutex_address, 16, 16, QLatin1Char{'0'}); } std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeMutexInfo::GetChildren() const { - std::vector<std::unique_ptr<WaitTreeItem>> list; - - bool has_waiters = (mutex_value & Kernel::Mutex::MutexHasWaitersFlag) != 0; + const bool has_waiters = (mutex_value & Kernel::Mutex::MutexHasWaitersFlag) != 0; + std::vector<std::unique_ptr<WaitTreeItem>> list; list.push_back(std::make_unique<WaitTreeText>(tr("has waiters: %1").arg(has_waiters))); list.push_back(std::make_unique<WaitTreeText>( - tr("owner handle: 0x%1").arg(owner_handle, 8, 16, QLatin1Char('0')))); - if (owner != nullptr) + tr("owner handle: 0x%1").arg(owner_handle, 8, 16, QLatin1Char{'0'}))); + if (owner != nullptr) { list.push_back(std::make_unique<WaitTreeThread>(*owner)); + } return list; } @@ -121,11 +121,14 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons u64 base_pointer = thread.GetContext().cpu_registers[BaseRegister]; while (base_pointer != 0) { - u64 lr = Memory::Read64(base_pointer + sizeof(u64)); - if (lr == 0) + const u64 lr = Memory::Read64(base_pointer + sizeof(u64)); + if (lr == 0) { break; - list.push_back( - std::make_unique<WaitTreeText>(tr("0x%1").arg(lr - sizeof(u32), 16, 16, QChar('0')))); + } + + list.push_back(std::make_unique<WaitTreeText>( + tr("0x%1").arg(lr - sizeof(u32), 16, 16, QLatin1Char{'0'}))); + base_pointer = Memory::Read64(base_pointer); } @@ -174,10 +177,10 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() con QString WaitTreeWaitObject::GetResetTypeQString(Kernel::ResetType reset_type) { switch (reset_type) { - case Kernel::ResetType::OneShot: - return tr("one shot"); - case Kernel::ResetType::Sticky: - return tr("sticky"); + case Kernel::ResetType::Automatic: + return tr("automatic reset"); + case Kernel::ResetType::Manual: + return tr("manual reset"); } UNREACHABLE(); return {}; @@ -227,8 +230,7 @@ QString WaitTreeThread::GetText() const { case Kernel::ThreadStatus::WaitIPC: status = tr("waiting for IPC reply"); break; - case Kernel::ThreadStatus::WaitSynchAll: - case Kernel::ThreadStatus::WaitSynchAny: + case Kernel::ThreadStatus::WaitSynch: status = tr("waiting for objects"); break; case Kernel::ThreadStatus::WaitMutex: @@ -250,9 +252,9 @@ QString WaitTreeThread::GetText() const { const auto& context = thread.GetContext(); const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") - .arg(context.pc, 8, 16, QLatin1Char('0')) - .arg(context.cpu_registers[30], 8, 16, QLatin1Char('0')); - return WaitTreeWaitObject::GetText() + pc_info + " (" + status + ") "; + .arg(context.pc, 8, 16, QLatin1Char{'0'}) + .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); + return QStringLiteral("%1%2 (%3) ").arg(WaitTreeWaitObject::GetText(), pc_info, status); } QColor WaitTreeThread::GetColor() const { @@ -269,8 +271,7 @@ QColor WaitTreeThread::GetColor() const { return QColor(Qt::GlobalColor::darkRed); case Kernel::ThreadStatus::WaitSleep: return QColor(Qt::GlobalColor::darkYellow); - case Kernel::ThreadStatus::WaitSynchAll: - case Kernel::ThreadStatus::WaitSynchAny: + case Kernel::ThreadStatus::WaitSynch: case Kernel::ThreadStatus::WaitMutex: case Kernel::ThreadStatus::WaitCondVar: case Kernel::ThreadStatus::WaitArb: @@ -325,10 +326,9 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); } - if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynchAny || - thread.GetStatus() == Kernel::ThreadStatus::WaitSynchAll) { + if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) { list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetWaitObjects(), - thread.IsSleepingOnWaitAll())); + thread.IsSleepingOnWait())); } list.push_back(std::make_unique<WaitTreeCallstack>(thread)); @@ -427,7 +427,7 @@ void WaitTreeModel::InitItems() { } WaitTreeWidget::WaitTreeWidget(QWidget* parent) : QDockWidget(tr("Wait Tree"), parent) { - setObjectName("WaitTreeWidget"); + setObjectName(QStringLiteral("WaitTreeWidget")); view = new QTreeView(this); view->setHeaderHidden(true); setWidget(view); diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 3db0e90da..2cf5c58a0 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h @@ -95,7 +95,7 @@ public: if (row2.isEmpty()) return row1; - return row1 + "\n " + row2; + return QString(row1 + "\n " + row2); } return GameListItem::data(role); diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h index 4f526dc7e..248fadaf3 100644 --- a/src/yuzu/hotkeys.h +++ b/src/yuzu/hotkeys.h @@ -67,8 +67,6 @@ public: private: struct Hotkey { - Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {} - QKeySequence keyseq; QShortcut* shortcut = nullptr; Qt::ShortcutContext context = Qt::WindowShortcut; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index bdee44b04..a59abf6e8 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -8,6 +8,7 @@ #include <thread> // VFS includes must be before glad as they will conflict with Windows file api, which uses defines. +#include "applets/error.h" #include "applets/profile_select.h" #include "applets/software_keyboard.h" #include "applets/web_browser.h" @@ -15,6 +16,7 @@ #include "configuration/configure_per_general.h" #include "core/file_sys/vfs.h" #include "core/file_sys/vfs_real.h" +#include "core/frontend/applets/general_frontend.h" #include "core/frontend/scope_acquire_window_context.h" #include "core/hle/service/acc/profile_manager.h" #include "core/hle/service/am/applets/applets.h" @@ -196,11 +198,11 @@ GMainWindow::GMainWindow() ConnectMenuEvents(); ConnectWidgetEvents(); + LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc); + UpdateWindowTitle(); - setWindowTitle(QString("yuzu %1| %2-%3") - .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc)); show(); Core::System::GetInstance().SetContentProvider( @@ -795,9 +797,13 @@ bool GMainWindow::LoadROM(const QString& filename) { system.SetGPUDebugContext(debug_context); - system.SetProfileSelector(std::make_unique<QtProfileSelector>(*this)); - system.SetSoftwareKeyboard(std::make_unique<QtSoftwareKeyboard>(*this)); - system.SetWebBrowser(std::make_unique<QtWebBrowser>(*this)); + system.SetAppletFrontendSet({ + std::make_unique<QtErrorDisplay>(*this), + nullptr, + std::make_unique<QtProfileSelector>(*this), + std::make_unique<QtSoftwareKeyboard>(*this), + std::make_unique<QtWebBrowser>(*this), + }); const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())}; @@ -930,9 +936,7 @@ void GMainWindow::BootGame(const QString& filename) { title_name = FileUtil::GetFilename(filename.toStdString()); } - setWindowTitle(QString("yuzu %1| %4 | %2-%3") - .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc, - QString::fromStdString(title_name))); + UpdateWindowTitle(QString::fromStdString(title_name)); loading_screen->Prepare(Core::System::GetInstance().GetAppLoader()); loading_screen->show(); @@ -973,8 +977,8 @@ void GMainWindow::ShutdownGame() { loading_screen->Clear(); game_list->show(); game_list->setFilterFocus(); - setWindowTitle(QString("yuzu %1| %2-%3") - .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc)); + + UpdateWindowTitle(); // Disable status bar updates status_bar_update_timer.stop(); @@ -1583,6 +1587,11 @@ void GMainWindow::OnLoadComplete() { loading_screen->OnLoadComplete(); } +void GMainWindow::ErrorDisplayDisplayError(QString body) { + QMessageBox::critical(this, tr("Error Display"), body); + emit ErrorDisplayFinished(); +} + void GMainWindow::OnMenuReportCompatibility() { if (!Settings::values.yuzu_token.empty() && !Settings::values.yuzu_username.empty()) { CompatDB compatdb{this}; @@ -1756,6 +1765,19 @@ void GMainWindow::OnCaptureScreenshot() { OnStartGame(); } +void GMainWindow::UpdateWindowTitle(const QString& title_name) { + const QString full_name = QString::fromUtf8(Common::g_build_fullname); + const QString branch_name = QString::fromUtf8(Common::g_scm_branch); + const QString description = QString::fromUtf8(Common::g_scm_desc); + + if (title_name.isEmpty()) { + setWindowTitle(QStringLiteral("yuzu %1| %2-%3").arg(full_name, branch_name, description)); + } else { + setWindowTitle(QStringLiteral("yuzu %1| %4 | %2-%3") + .arg(full_name, branch_name, description, title_name)); + } +} + void GMainWindow::UpdateStatusBar() { if (emu_thread == nullptr) { status_bar_update_timer.stop(); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index ce5045819..7bf82e665 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -102,6 +102,8 @@ signals: // Signal that tells widgets to update icons to use the current theme void UpdateThemedIcons(); + void ErrorDisplayFinished(); + void ProfileSelectorFinishedSelection(std::optional<Service::Account::UUID> uuid); void SoftwareKeyboardFinishedText(std::optional<std::u16string> text); void SoftwareKeyboardFinishedCheckDialog(); @@ -111,6 +113,7 @@ signals: public slots: void OnLoadComplete(); + void ErrorDisplayDisplayError(QString body); void ProfileSelectorSelectProfile(); void SoftwareKeyboardGetText(const Core::Frontend::SoftwareKeyboardParameters& parameters); void SoftwareKeyboardInvokeCheckDialog(std::u16string error_message); @@ -206,6 +209,7 @@ private slots: private: std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); + void UpdateWindowTitle(const QString& title_name = {}); void UpdateStatusBar(); Ui::MainWindow ui; diff --git a/src/yuzu/util/spinbox.cpp b/src/yuzu/util/spinbox.cpp deleted file mode 100644 index 14ef1e884..000000000 --- a/src/yuzu/util/spinbox.cpp +++ /dev/null @@ -1,278 +0,0 @@ -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -// Copyright 2014 Tony Wasserka -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of the owner nor the names of its contributors may -// be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include <cstdlib> -#include <QLineEdit> -#include <QRegExpValidator> -#include "common/assert.h" -#include "yuzu/util/spinbox.h" - -CSpinBox::CSpinBox(QWidget* parent) - : QAbstractSpinBox(parent), min_value(-100), max_value(100), value(0), base(10), num_digits(0) { - // TODO: Might be nice to not immediately call the slot. - // Think of an address that is being replaced by a different one, in which case a lot - // invalid intermediate addresses would be read from during editing. - connect(lineEdit(), &QLineEdit::textEdited, this, &CSpinBox::OnEditingFinished); - - UpdateText(); -} - -void CSpinBox::SetValue(qint64 val) { - auto old_value = value; - value = std::max(std::min(val, max_value), min_value); - - if (old_value != value) { - UpdateText(); - emit ValueChanged(value); - } -} - -void CSpinBox::SetRange(qint64 min, qint64 max) { - min_value = min; - max_value = max; - - SetValue(value); - UpdateText(); -} - -void CSpinBox::stepBy(int steps) { - auto new_value = value; - // Scale number of steps by the currently selected digit - // TODO: Move this code elsewhere and enable it. - // TODO: Support for num_digits==0, too - // TODO: Support base!=16, too - // TODO: Make the cursor not jump back to the end of the line... - /*if (base == 16 && num_digits > 0) { - int digit = num_digits - (lineEdit()->cursorPosition() - prefix.length()) - 1; - digit = std::max(0, std::min(digit, num_digits - 1)); - steps <<= digit * 4; - }*/ - - // Increment "new_value" by "steps", and perform annoying overflow checks, too. - if (steps < 0 && new_value + steps > new_value) { - new_value = std::numeric_limits<qint64>::min(); - } else if (steps > 0 && new_value + steps < new_value) { - new_value = std::numeric_limits<qint64>::max(); - } else { - new_value += steps; - } - - SetValue(new_value); - UpdateText(); -} - -QAbstractSpinBox::StepEnabled CSpinBox::stepEnabled() const { - StepEnabled ret = StepNone; - - if (value > min_value) - ret |= StepDownEnabled; - - if (value < max_value) - ret |= StepUpEnabled; - - return ret; -} - -void CSpinBox::SetBase(int base) { - this->base = base; - - UpdateText(); -} - -void CSpinBox::SetNumDigits(int num_digits) { - this->num_digits = num_digits; - - UpdateText(); -} - -void CSpinBox::SetPrefix(const QString& prefix) { - this->prefix = prefix; - - UpdateText(); -} - -void CSpinBox::SetSuffix(const QString& suffix) { - this->suffix = suffix; - - UpdateText(); -} - -static QString StringToInputMask(const QString& input) { - QString mask = input; - - // ... replace any special characters by their escaped counterparts ... - mask.replace("\\", "\\\\"); - mask.replace("A", "\\A"); - mask.replace("a", "\\a"); - mask.replace("N", "\\N"); - mask.replace("n", "\\n"); - mask.replace("X", "\\X"); - mask.replace("x", "\\x"); - mask.replace("9", "\\9"); - mask.replace("0", "\\0"); - mask.replace("D", "\\D"); - mask.replace("d", "\\d"); - mask.replace("#", "\\#"); - mask.replace("H", "\\H"); - mask.replace("h", "\\h"); - mask.replace("B", "\\B"); - mask.replace("b", "\\b"); - mask.replace(">", "\\>"); - mask.replace("<", "\\<"); - mask.replace("!", "\\!"); - - return mask; -} - -void CSpinBox::UpdateText() { - // If a fixed number of digits is used, we put the line edit in insertion mode by setting an - // input mask. - QString mask; - if (num_digits != 0) { - mask += StringToInputMask(prefix); - - // For base 10 and negative range, demand a single sign character - if (HasSign()) - mask += "X"; // identified as "-" or "+" in the validator - - // Uppercase digits greater than 9. - mask += ">"; - - // Match num_digits digits - // Digits irrelevant to the chosen number base are filtered in the validator - mask += QString("H").repeated(std::max(num_digits, 1)); - - // Switch off case conversion - mask += "!"; - - mask += StringToInputMask(suffix); - } - lineEdit()->setInputMask(mask); - - // Set new text without changing the cursor position. This will cause the cursor to briefly - // appear at the end of the line and then to jump back to its original position. That's - // a bit ugly, but better than having setText() move the cursor permanently all the time. - int cursor_position = lineEdit()->cursorPosition(); - lineEdit()->setText(TextFromValue()); - lineEdit()->setCursorPosition(cursor_position); -} - -QString CSpinBox::TextFromValue() { - return prefix + QString(HasSign() ? ((value < 0) ? "-" : "+") : "") + - QString("%1").arg(std::abs(value), num_digits, base, QLatin1Char('0')).toUpper() + - suffix; -} - -qint64 CSpinBox::ValueFromText() { - unsigned strpos = prefix.length(); - - QString num_string = text().mid(strpos, text().length() - strpos - suffix.length()); - return num_string.toLongLong(nullptr, base); -} - -bool CSpinBox::HasSign() const { - return base == 10 && min_value < 0; -} - -void CSpinBox::OnEditingFinished() { - // Only update for valid input - QString input = lineEdit()->text(); - int pos = 0; - if (QValidator::Acceptable == validate(input, pos)) - SetValue(ValueFromText()); -} - -QValidator::State CSpinBox::validate(QString& input, int& pos) const { - if (!prefix.isEmpty() && input.left(prefix.length()) != prefix) - return QValidator::Invalid; - - int strpos = prefix.length(); - - // Empty "numbers" allowed as intermediate values - if (strpos >= input.length() - HasSign() - suffix.length()) - return QValidator::Intermediate; - - DEBUG_ASSERT(base <= 10 || base == 16); - QString regexp; - - // Demand sign character for negative ranges - if (HasSign()) - regexp += "[+\\-]"; - - // Match digits corresponding to the chosen number base. - regexp += QString("[0-%1").arg(std::min(base, 9)); - if (base == 16) { - regexp += "a-fA-F"; - } - regexp += "]"; - - // Specify number of digits - if (num_digits > 0) { - regexp += QString("{%1}").arg(num_digits); - } else { - regexp += "+"; - } - - // Match string - QRegExp num_regexp(regexp); - int num_pos = strpos; - QString sub_input = input.mid(strpos, input.length() - strpos - suffix.length()); - - if (!num_regexp.exactMatch(sub_input) && num_regexp.matchedLength() == 0) - return QValidator::Invalid; - - sub_input = sub_input.left(num_regexp.matchedLength()); - bool ok; - qint64 val = sub_input.toLongLong(&ok, base); - - if (!ok) - return QValidator::Invalid; - - // Outside boundaries => don't accept - if (val < min_value || val > max_value) - return QValidator::Invalid; - - // Make sure we are actually at the end of this string... - strpos += num_regexp.matchedLength(); - - if (!suffix.isEmpty() && input.mid(strpos) != suffix) { - return QValidator::Invalid; - } else { - strpos += suffix.length(); - } - - if (strpos != input.length()) - return QValidator::Invalid; - - // At this point we can say for sure that the input is fine. Let's fix it up a bit though - input.replace(num_pos, sub_input.length(), sub_input.toUpper()); - - return QValidator::Acceptable; -} diff --git a/src/yuzu/util/spinbox.h b/src/yuzu/util/spinbox.h deleted file mode 100644 index 2fa1db3a4..000000000 --- a/src/yuzu/util/spinbox.h +++ /dev/null @@ -1,86 +0,0 @@ -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -// Copyright 2014 Tony Wasserka -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of the owner nor the names of its contributors may -// be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#pragma once - -#include <QAbstractSpinBox> -#include <QtGlobal> - -class QVariant; - -/** - * A custom spin box widget with enhanced functionality over Qt's QSpinBox - */ -class CSpinBox : public QAbstractSpinBox { - Q_OBJECT - -public: - explicit CSpinBox(QWidget* parent = nullptr); - - void stepBy(int steps) override; - StepEnabled stepEnabled() const override; - - void SetValue(qint64 val); - - void SetRange(qint64 min, qint64 max); - - void SetBase(int base); - - void SetPrefix(const QString& prefix); - void SetSuffix(const QString& suffix); - - void SetNumDigits(int num_digits); - - QValidator::State validate(QString& input, int& pos) const override; - -signals: - void ValueChanged(qint64 val); - -private slots: - void OnEditingFinished(); - -private: - void UpdateText(); - - bool HasSign() const; - - QString TextFromValue(); - qint64 ValueFromText(); - - qint64 min_value, max_value; - - qint64 value; - - QString prefix, suffix; - - int base; - - int num_digits; -}; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index f24cc77fe..d0ae058fd 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -349,6 +349,8 @@ void Config::ReadValues() { Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); Settings::values.frame_limit = static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); + Settings::values.use_compatibility_profile = + sdl2_config->GetBoolean("Renderer", "use_compatibility_profile", true); Settings::values.use_disk_shader_cache = sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); Settings::values.use_accurate_gpu_emulation = diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index 68a176032..8f104062d 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -176,9 +176,13 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { SDL_SetMainReady(); + const SDL_GLprofile profile = Settings::values.use_compatibility_profile + ? SDL_GL_CONTEXT_PROFILE_COMPATIBILITY + : SDL_GL_CONTEXT_PROFILE_CORE; + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, profile); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8); diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 7ea4a1b18..d3734927b 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -32,11 +32,7 @@ #include "yuzu_cmd/config.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" -#include <getopt.h> #include "core/file_sys/registered_cache.h" -#ifndef _MSC_VER -#include <unistd.h> -#endif #ifdef _WIN32 // windows.h needs to be included before shellapi.h @@ -45,6 +41,12 @@ #include <shellapi.h> #endif +#undef _UNICODE +#include <getopt.h> +#ifndef _MSC_VER +#include <unistd.h> +#endif + #ifdef _WIN32 extern "C" { // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable @@ -220,6 +222,7 @@ int main(int argc, char** argv) { system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); + emu_window->MakeCurrent(); system.Renderer().Rasterizer().LoadDiskResources(); while (emu_window->IsOpen()) { |