diff options
Diffstat (limited to 'src')
208 files changed, 6146 insertions, 1795 deletions
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 8f2591d53..04bc3128f 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -120,7 +120,7 @@ private: duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin); entry.log_class = log_class; entry.log_level = log_level; - entry.filename = Common::TrimSourcePath(filename); + entry.filename = filename; entry.line_num = line_nr; entry.function = function; entry.message = std::move(message); diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index fca0267a1..fc338c70d 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -23,7 +23,7 @@ struct Entry { std::chrono::microseconds timestamp; Class log_class; Level log_level; - std::string filename; + const char* filename; unsigned int line_num; std::string function; std::string message; diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 259708116..13a4f1e30 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -9,6 +9,15 @@ namespace Log { +// trims up to and including the last of ../, ..\, src/, src\ in a string +constexpr const char* TrimSourcePath(std::string_view source) { + const auto rfind = [source](const std::string_view match) { + return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size()); + }; + auto idx = std::max({rfind("src/"), rfind("src\\"), rfind("../"), rfind("..\\")}); + return source.data() + idx; +} + /// Specifies the severity or level of detail of the log message. enum class Level : u8 { Trace, ///< Extremely detailed and repetitive debugging information that is likely to @@ -141,24 +150,24 @@ void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsig #ifdef _DEBUG #define LOG_TRACE(log_class, ...) \ - ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Trace, __FILE__, __LINE__, \ - __func__, __VA_ARGS__) + ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Trace, \ + ::Log::TrimSourcePath(__FILE__), __LINE__, __func__, __VA_ARGS__) #else #define LOG_TRACE(log_class, fmt, ...) (void(0)) #endif #define LOG_DEBUG(log_class, ...) \ - ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Debug, __FILE__, __LINE__, \ - __func__, __VA_ARGS__) + ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Debug, \ + ::Log::TrimSourcePath(__FILE__), __LINE__, __func__, __VA_ARGS__) #define LOG_INFO(log_class, ...) \ - ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Info, __FILE__, __LINE__, \ - __func__, __VA_ARGS__) + ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Info, \ + ::Log::TrimSourcePath(__FILE__), __LINE__, __func__, __VA_ARGS__) #define LOG_WARNING(log_class, ...) \ - ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Warning, __FILE__, __LINE__, \ - __func__, __VA_ARGS__) + ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Warning, \ + ::Log::TrimSourcePath(__FILE__), __LINE__, __func__, __VA_ARGS__) #define LOG_ERROR(log_class, ...) \ - ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Error, __FILE__, __LINE__, \ - __func__, __VA_ARGS__) + ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Error, \ + ::Log::TrimSourcePath(__FILE__), __LINE__, __func__, __VA_ARGS__) #define LOG_CRITICAL(log_class, ...) \ - ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Critical, __FILE__, __LINE__, \ - __func__, __VA_ARGS__) + ::Log::FmtLogMessage(::Log::Class::log_class, ::Log::Level::Critical, \ + ::Log::TrimSourcePath(__FILE__), __LINE__, __func__, __VA_ARGS__) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 959f278aa..84883a1d3 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -223,26 +223,4 @@ std::u16string UTF16StringFromFixedZeroTerminatedBuffer(std::u16string_view buff return std::u16string(buffer.begin(), buffer.begin() + len); } -const char* TrimSourcePath(const char* path, const char* root) { - const char* p = path; - - while (*p != '\0') { - const char* next_slash = p; - while (*next_slash != '\0' && *next_slash != '/' && *next_slash != '\\') { - ++next_slash; - } - - bool is_src = Common::ComparePartialString(p, next_slash, root); - p = next_slash; - - if (*p != '\0') { - ++p; - } - if (is_src) { - path = p; - } - } - return path; -} - } // namespace Common diff --git a/src/common/thread.h b/src/common/thread.h index 0cfd98be6..2fc071685 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -28,6 +28,15 @@ public: is_set = false; } + template <class Duration> + bool WaitFor(const std::chrono::duration<Duration>& time) { + std::unique_lock lk{mutex}; + if (!condvar.wait_for(lk, time, [this] { return is_set; })) + return false; + is_set = false; + return true; + } + template <class Clock, class Duration> bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) { std::unique_lock lk{mutex}; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1a3647a67..26612e692 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -15,14 +15,14 @@ add_library(core STATIC constants.h core.cpp core.h - core_cpu.cpp - core_cpu.h + core_manager.cpp + core_manager.h core_timing.cpp core_timing.h core_timing_util.cpp core_timing_util.h - cpu_core_manager.cpp - cpu_core_manager.h + cpu_manager.cpp + cpu_manager.h crypto/aes_util.cpp crypto/aes_util.h crypto/encryption_layer.cpp @@ -158,6 +158,8 @@ add_library(core STATIC hle/kernel/mutex.h hle/kernel/object.cpp hle/kernel/object.h + hle/kernel/physical_core.cpp + hle/kernel/physical_core.h hle/kernel/process.cpp hle/kernel/process.h hle/kernel/process_capability.cpp @@ -179,14 +181,16 @@ add_library(core STATIC hle/kernel/svc.cpp hle/kernel/svc.h hle/kernel/svc_wrap.h + hle/kernel/synchronization_object.cpp + hle/kernel/synchronization_object.h + hle/kernel/synchronization.cpp + hle/kernel/synchronization.h hle/kernel/thread.cpp hle/kernel/thread.h hle/kernel/transfer_memory.cpp hle/kernel/transfer_memory.h hle/kernel/vm_manager.cpp hle/kernel/vm_manager.h - hle/kernel/wait_object.cpp - hle/kernel/wait_object.h hle/kernel/writable_event.cpp hle/kernel/writable_event.h hle/lock.cpp diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index e825c0526..29eaf74e5 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -10,11 +10,13 @@ #include "common/microprofile.h" #include "core/arm/dynarmic/arm_dynarmic.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" @@ -87,7 +89,7 @@ public: if (GDBStub::IsServerEnabled()) { parent.jit->HaltExecution(); parent.SetPC(pc); - Kernel::Thread* thread = Kernel::GetCurrentThread(); + Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); parent.SaveContext(thread->GetContext()); GDBStub::Break(); GDBStub::SendTrap(thread, 5); @@ -152,7 +154,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag config.tpidr_el0 = &cb->tpidr_el0; config.dczid_el0 = 4; config.ctr_el0 = 0x8444c004; - config.cntfrq_el0 = Timing::CNTFREQ; + config.cntfrq_el0 = Hardware::CNTFREQ; // Unpredictable instructions config.define_unpredictable_behaviour = true; diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index abd59ff4b..94570e520 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp @@ -2,10 +2,24 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif #include "core/arm/exclusive_monitor.h" +#include "core/memory.h" namespace Core { ExclusiveMonitor::~ExclusiveMonitor() = default; +std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, + std::size_t num_cores) { +#ifdef ARCHITECTURE_x86_64 + return std::make_unique<Core::DynarmicExclusiveMonitor>(memory, num_cores); +#else + // TODO(merry): Passthrough exclusive monitor + return nullptr; +#endif +} + } // namespace Core diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index f59aca667..4ef418b90 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -4,8 +4,14 @@ #pragma once +#include <memory> + #include "common/common_types.h" +namespace Memory { +class Memory; +} + namespace Core { class ExclusiveMonitor { @@ -22,4 +28,7 @@ public: virtual bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) = 0; }; +std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, + std::size_t num_cores); + } // namespace Core diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 48182c99a..f99ad5802 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -9,6 +9,7 @@ #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" namespace Core { @@ -177,7 +178,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); } - Kernel::Thread* thread = Kernel::GetCurrentThread(); + Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); SaveContext(thread->GetContext()); if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { last_bkpt_hit = false; diff --git a/src/core/core.cpp b/src/core/core.cpp index d697b80ef..0eb0c0dca 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -11,9 +11,9 @@ #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" -#include "core/cpu_core_manager.h" +#include "core/cpu_manager.h" #include "core/file_sys/bis_factory.h" #include "core/file_sys/card_image.h" #include "core/file_sys/mode.h" @@ -28,6 +28,7 @@ #include "core/hardware_interrupt_manager.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" @@ -113,16 +114,25 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, struct System::Impl { explicit Impl(System& system) : kernel{system}, fs_controller{system}, memory{system}, - cpu_core_manager{system}, reporter{system}, applet_manager{system} {} + cpu_manager{system}, reporter{system}, applet_manager{system} {} - Cpu& CurrentCpuCore() { - return cpu_core_manager.GetCurrentCore(); + CoreManager& CurrentCoreManager() { + return cpu_manager.GetCurrentCoreManager(); + } + + Kernel::PhysicalCore& CurrentPhysicalCore() { + const auto index = cpu_manager.GetActiveCoreIndex(); + return kernel.PhysicalCore(index); + } + + Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) { + return kernel.PhysicalCore(index); } ResultStatus RunLoop(bool tight_loop) { status = ResultStatus::Success; - cpu_core_manager.RunLoop(tight_loop); + cpu_manager.RunLoop(tight_loop); return status; } @@ -131,8 +141,8 @@ struct System::Impl { LOG_DEBUG(HW_Memory, "initialized OK"); core_timing.Initialize(); - cpu_core_manager.Initialize(); kernel.Initialize(); + cpu_manager.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch()); @@ -205,7 +215,6 @@ struct System::Impl { // Main process has been loaded and been made current. // Begin GPU and CPU execution. gpu_core->Start(); - cpu_core_manager.StartThreads(); // Initialize cheat engine if (cheat_engine) { @@ -259,7 +268,9 @@ struct System::Impl { is_powered_on = false; exit_lock = false; - gpu_core->WaitIdle(); + if (gpu_core) { + gpu_core->WaitIdle(); + } // Shutdown emulation session renderer.reset(); @@ -272,7 +283,7 @@ struct System::Impl { gpu_core.reset(); // Close all CPU/threading state - cpu_core_manager.Shutdown(); + cpu_manager.Shutdown(); // Shutdown kernel and core timing kernel.Shutdown(); @@ -342,7 +353,7 @@ struct System::Impl { std::unique_ptr<Tegra::GPU> gpu_core; std::unique_ptr<Hardware::InterruptManager> interrupt_manager; Memory::Memory memory; - CpuCoreManager cpu_core_manager; + CpuManager cpu_manager; bool is_powered_on = false; bool exit_lock = false; @@ -377,12 +388,12 @@ struct System::Impl { System::System() : impl{std::make_unique<Impl>(*this)} {} System::~System() = default; -Cpu& System::CurrentCpuCore() { - return impl->CurrentCpuCore(); +CoreManager& System::CurrentCoreManager() { + return impl->CurrentCoreManager(); } -const Cpu& System::CurrentCpuCore() const { - return impl->CurrentCpuCore(); +const CoreManager& System::CurrentCoreManager() const { + return impl->CurrentCoreManager(); } System::ResultStatus System::RunLoop(bool tight_loop) { @@ -394,7 +405,7 @@ System::ResultStatus System::SingleStep() { } void System::InvalidateCpuInstructionCaches() { - impl->cpu_core_manager.InvalidateAllInstructionCaches(); + impl->kernel.InvalidateAllInstructionCaches(); } System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath) { @@ -406,13 +417,11 @@ bool System::IsPoweredOn() const { } void System::PrepareReschedule() { - CurrentCpuCore().PrepareReschedule(); + impl->CurrentPhysicalCore().Stop(); } void System::PrepareReschedule(const u32 core_index) { - if (core_index < GlobalScheduler().CpuCoresCount()) { - CpuCore(core_index).PrepareReschedule(); - } + impl->kernel.PrepareReschedule(core_index); } PerfStatsResults System::GetAndResetPerfStats() { @@ -428,31 +437,31 @@ const TelemetrySession& System::TelemetrySession() const { } ARM_Interface& System::CurrentArmInterface() { - return CurrentCpuCore().ArmInterface(); + return impl->CurrentPhysicalCore().ArmInterface(); } const ARM_Interface& System::CurrentArmInterface() const { - return CurrentCpuCore().ArmInterface(); + return impl->CurrentPhysicalCore().ArmInterface(); } std::size_t System::CurrentCoreIndex() const { - return CurrentCpuCore().CoreIndex(); + return impl->cpu_manager.GetActiveCoreIndex(); } Kernel::Scheduler& System::CurrentScheduler() { - return CurrentCpuCore().Scheduler(); + return impl->CurrentPhysicalCore().Scheduler(); } const Kernel::Scheduler& System::CurrentScheduler() const { - return CurrentCpuCore().Scheduler(); + return impl->CurrentPhysicalCore().Scheduler(); } Kernel::Scheduler& System::Scheduler(std::size_t core_index) { - return CpuCore(core_index).Scheduler(); + return impl->GetPhysicalCore(core_index).Scheduler(); } const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { - return CpuCore(core_index).Scheduler(); + return impl->GetPhysicalCore(core_index).Scheduler(); } /// Gets the global scheduler @@ -474,28 +483,28 @@ const Kernel::Process* System::CurrentProcess() const { } ARM_Interface& System::ArmInterface(std::size_t core_index) { - return CpuCore(core_index).ArmInterface(); + return impl->GetPhysicalCore(core_index).ArmInterface(); } const ARM_Interface& System::ArmInterface(std::size_t core_index) const { - return CpuCore(core_index).ArmInterface(); + return impl->GetPhysicalCore(core_index).ArmInterface(); } -Cpu& System::CpuCore(std::size_t core_index) { - return impl->cpu_core_manager.GetCore(core_index); +CoreManager& System::GetCoreManager(std::size_t core_index) { + return impl->cpu_manager.GetCoreManager(core_index); } -const Cpu& System::CpuCore(std::size_t core_index) const { +const CoreManager& System::GetCoreManager(std::size_t core_index) const { ASSERT(core_index < NUM_CPU_CORES); - return impl->cpu_core_manager.GetCore(core_index); + return impl->cpu_manager.GetCoreManager(core_index); } ExclusiveMonitor& System::Monitor() { - return impl->cpu_core_manager.GetExclusiveMonitor(); + return impl->kernel.GetExclusiveMonitor(); } const ExclusiveMonitor& System::Monitor() const { - return impl->cpu_core_manager.GetExclusiveMonitor(); + return impl->kernel.GetExclusiveMonitor(); } Memory::Memory& System::Memory() { diff --git a/src/core/core.h b/src/core/core.h index e240c5c58..e69d68fcf 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -93,7 +93,7 @@ class Memory; namespace Core { class ARM_Interface; -class Cpu; +class CoreManager; class ExclusiveMonitor; class FrameLimiter; class PerfStats; @@ -218,10 +218,10 @@ public: const ARM_Interface& ArmInterface(std::size_t core_index) const; /// Gets a CPU interface to the CPU core with the specified index - Cpu& CpuCore(std::size_t core_index); + CoreManager& GetCoreManager(std::size_t core_index); /// Gets a CPU interface to the CPU core with the specified index - const Cpu& CpuCore(std::size_t core_index) const; + const CoreManager& GetCoreManager(std::size_t core_index) const; /// Gets a reference to the exclusive monitor ExclusiveMonitor& Monitor(); @@ -364,10 +364,10 @@ private: System(); /// Returns the currently running CPU core - Cpu& CurrentCpuCore(); + CoreManager& CurrentCoreManager(); /// Returns the currently running CPU core - const Cpu& CurrentCpuCore() const; + const CoreManager& CurrentCoreManager() const; /** * Initialize the emulated system. diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp deleted file mode 100644 index 630cd4feb..000000000 --- a/src/core/core_cpu.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <condition_variable> -#include <mutex> - -#include "common/logging/log.h" -#ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" -#endif -#include "core/arm/exclusive_monitor.h" -#include "core/arm/unicorn/arm_unicorn.h" -#include "core/core.h" -#include "core/core_cpu.h" -#include "core/core_timing.h" -#include "core/hle/kernel/scheduler.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/lock.h" -#include "core/settings.h" - -namespace Core { - -void CpuBarrier::NotifyEnd() { - std::unique_lock lock{mutex}; - end = true; - condition.notify_all(); -} - -bool CpuBarrier::Rendezvous() { - if (!Settings::values.use_multi_core) { - // Meaningless when running in single-core mode - return true; - } - - if (!end) { - std::unique_lock lock{mutex}; - - --cores_waiting; - if (!cores_waiting) { - cores_waiting = NUM_CPU_CORES; - condition.notify_all(); - return true; - } - - condition.wait(lock); - return true; - } - - return false; -} - -Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, - std::size_t core_index) - : cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()}, - core_timing{system.CoreTiming()}, core_index{core_index} { -#ifdef ARCHITECTURE_x86_64 - arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); -#else - arm_interface = std::make_unique<ARM_Unicorn>(system); - LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); -#endif - - scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); -} - -Cpu::~Cpu() = default; - -std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor( - [[maybe_unused]] Memory::Memory& memory, [[maybe_unused]] std::size_t num_cores) { -#ifdef ARCHITECTURE_x86_64 - return std::make_unique<DynarmicExclusiveMonitor>(memory, num_cores); -#else - // TODO(merry): Passthrough exclusive monitor - return nullptr; -#endif -} - -void Cpu::RunLoop(bool tight_loop) { - // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step - if (!cpu_barrier.Rendezvous()) { - // If rendezvous failed, session has been killed - return; - } - - Reschedule(); - - // If we don't have a currently active thread then don't execute instructions, - // instead advance to the next event and try to yield to the next thread - if (Kernel::GetCurrentThread() == nullptr) { - LOG_TRACE(Core, "Core-{} idling", core_index); - core_timing.Idle(); - } else { - if (tight_loop) { - arm_interface->Run(); - } else { - arm_interface->Step(); - } - // We are stopping a run, exclusive state must be cleared - arm_interface->ClearExclusiveState(); - } - core_timing.Advance(); - - Reschedule(); -} - -void Cpu::SingleStep() { - return RunLoop(false); -} - -void Cpu::PrepareReschedule() { - arm_interface->PrepareReschedule(); -} - -void Cpu::Reschedule() { - // Lock the global kernel mutex when we manipulate the HLE state - std::lock_guard lock(HLE::g_hle_lock); - - global_scheduler.SelectThread(core_index); - scheduler->TryDoContextSwitch(); -} - -void Cpu::Shutdown() { - scheduler->Shutdown(); -} - -} // namespace Core diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h deleted file mode 100644 index 78f5021a2..000000000 --- a/src/core/core_cpu.h +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <condition_variable> -#include <cstddef> -#include <memory> -#include <mutex> -#include "common/common_types.h" - -namespace Kernel { -class GlobalScheduler; -class Scheduler; -} // namespace Kernel - -namespace Core { -class System; -} - -namespace Core::Timing { -class CoreTiming; -} - -namespace Memory { -class Memory; -} - -namespace Core { - -class ARM_Interface; -class ExclusiveMonitor; - -constexpr unsigned NUM_CPU_CORES{4}; - -class CpuBarrier { -public: - bool IsAlive() const { - return !end; - } - - void NotifyEnd(); - - bool Rendezvous(); - -private: - unsigned cores_waiting{NUM_CPU_CORES}; - std::mutex mutex; - std::condition_variable condition; - std::atomic<bool> end{}; -}; - -class Cpu { -public: - Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, - std::size_t core_index); - ~Cpu(); - - void RunLoop(bool tight_loop = true); - - void SingleStep(); - - void PrepareReschedule(); - - ARM_Interface& ArmInterface() { - return *arm_interface; - } - - const ARM_Interface& ArmInterface() const { - return *arm_interface; - } - - Kernel::Scheduler& Scheduler() { - return *scheduler; - } - - const Kernel::Scheduler& Scheduler() const { - return *scheduler; - } - - bool IsMainCore() const { - return core_index == 0; - } - - std::size_t CoreIndex() const { - return core_index; - } - - void Shutdown(); - - /** - * Creates an exclusive monitor to handle exclusive reads/writes. - * - * @param memory The current memory subsystem that the monitor may wish - * to keep track of. - * - * @param num_cores The number of cores to assume about the CPU. - * - * @returns The constructed exclusive monitor instance, or nullptr if the current - * CPU backend is unable to use an exclusive monitor. - */ - static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, - std::size_t num_cores); - -private: - void Reschedule(); - - std::unique_ptr<ARM_Interface> arm_interface; - CpuBarrier& cpu_barrier; - Kernel::GlobalScheduler& global_scheduler; - std::unique_ptr<Kernel::Scheduler> scheduler; - Timing::CoreTiming& core_timing; - - std::atomic<bool> reschedule_pending = false; - std::size_t core_index; -}; - -} // namespace Core diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp new file mode 100644 index 000000000..8eacf92dd --- /dev/null +++ b/src/core/core_manager.cpp @@ -0,0 +1,70 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <condition_variable> +#include <mutex> + +#include "common/logging/log.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/lock.h" +#include "core/settings.h" + +namespace Core { + +CoreManager::CoreManager(System& system, std::size_t core_index) + : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore( + core_index)}, + core_timing{system.CoreTiming()}, core_index{core_index} {} + +CoreManager::~CoreManager() = default; + +void CoreManager::RunLoop(bool tight_loop) { + Reschedule(); + + // If we don't have a currently active thread then don't execute instructions, + // instead advance to the next event and try to yield to the next thread + if (Kernel::GetCurrentThread() == nullptr) { + LOG_TRACE(Core, "Core-{} idling", core_index); + core_timing.Idle(); + } else { + if (tight_loop) { + physical_core.Run(); + } else { + physical_core.Step(); + } + } + core_timing.Advance(); + + Reschedule(); +} + +void CoreManager::SingleStep() { + return RunLoop(false); +} + +void CoreManager::PrepareReschedule() { + physical_core.Stop(); +} + +void CoreManager::Reschedule() { + // Lock the global kernel mutex when we manipulate the HLE state + std::lock_guard lock(HLE::g_hle_lock); + + global_scheduler.SelectThread(core_index); + + physical_core.Scheduler().TryDoContextSwitch(); +} + +} // namespace Core diff --git a/src/core/core_manager.h b/src/core/core_manager.h new file mode 100644 index 000000000..b14e723d7 --- /dev/null +++ b/src/core/core_manager.h @@ -0,0 +1,63 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <cstddef> +#include <memory> +#include "common/common_types.h" + +namespace Kernel { +class GlobalScheduler; +class PhysicalCore; +} // namespace Kernel + +namespace Core { +class System; +} + +namespace Core::Timing { +class CoreTiming; +} + +namespace Memory { +class Memory; +} + +namespace Core { + +constexpr unsigned NUM_CPU_CORES{4}; + +class CoreManager { +public: + CoreManager(System& system, std::size_t core_index); + ~CoreManager(); + + void RunLoop(bool tight_loop = true); + + void SingleStep(); + + void PrepareReschedule(); + + bool IsMainCore() const { + return core_index == 0; + } + + std::size_t CoreIndex() const { + return core_index; + } + +private: + void Reschedule(); + + Kernel::GlobalScheduler& global_scheduler; + Kernel::PhysicalCore& physical_core; + Timing::CoreTiming& core_timing; + + std::atomic<bool> reschedule_pending = false; + std::size_t core_index; +}; + +} // namespace Core diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index aa09fa453..46d4178c4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/thread.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" namespace Core::Timing { @@ -215,7 +216,7 @@ void CoreTiming::Idle() { } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; + return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE}; } s64 CoreTiming::GetDowncount() const { diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index a10472a95..de50d3b14 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -11,7 +11,7 @@ namespace Core::Timing { -constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE; +constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE; s64 msToCycles(std::chrono::milliseconds ms) { if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) { @@ -20,9 +20,9 @@ s64 msToCycles(std::chrono::milliseconds ms) { } if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) { LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return BASE_CLOCK_RATE * (ms.count() / 1000); + return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000); } - return (BASE_CLOCK_RATE * ms.count()) / 1000; + return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000; } s64 usToCycles(std::chrono::microseconds us) { @@ -32,9 +32,9 @@ s64 usToCycles(std::chrono::microseconds us) { } if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) { LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return BASE_CLOCK_RATE * (us.count() / 1000000); + return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000); } - return (BASE_CLOCK_RATE * us.count()) / 1000000; + return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000; } s64 nsToCycles(std::chrono::nanoseconds ns) { @@ -44,14 +44,14 @@ s64 nsToCycles(std::chrono::nanoseconds ns) { } if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) { LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return BASE_CLOCK_RATE * (ns.count() / 1000000000); + return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000); } - return (BASE_CLOCK_RATE * ns.count()) / 1000000000; + return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; } u64 CpuCyclesToClockCycles(u64 ticks) { - const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ); - return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first; + const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ); + return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; } } // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index cdd84d70f..addc72b19 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -6,28 +6,24 @@ #include <chrono> #include "common/common_types.h" +#include "core/hardware_properties.h" namespace Core::Timing { -// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz -// The exact value used is of course unverified. -constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked -constexpr u64 CNTFREQ = 19200000; // Value from fusee. - s64 msToCycles(std::chrono::milliseconds ms); s64 usToCycles(std::chrono::microseconds us); s64 nsToCycles(std::chrono::nanoseconds ns); inline std::chrono::milliseconds CyclesToMs(s64 cycles) { - return std::chrono::milliseconds(cycles * 1000 / BASE_CLOCK_RATE); + return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE); } inline std::chrono::nanoseconds CyclesToNs(s64 cycles) { - return std::chrono::nanoseconds(cycles * 1000000000 / BASE_CLOCK_RATE); + return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE); } inline std::chrono::microseconds CyclesToUs(s64 cycles) { - return std::chrono::microseconds(cycles * 1000000 / BASE_CLOCK_RATE); + return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE); } u64 CpuCyclesToClockCycles(u64 ticks); diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp deleted file mode 100644 index f04a34133..000000000 --- a/src/core/cpu_core_manager.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "core/arm/exclusive_monitor.h" -#include "core/core.h" -#include "core/core_cpu.h" -#include "core/core_timing.h" -#include "core/cpu_core_manager.h" -#include "core/gdbstub/gdbstub.h" -#include "core/settings.h" - -namespace Core { -namespace { -void RunCpuCore(const System& system, Cpu& cpu_state) { - while (system.IsPoweredOn()) { - cpu_state.RunLoop(true); - } -} -} // Anonymous namespace - -CpuCoreManager::CpuCoreManager(System& system) : system{system} {} -CpuCoreManager::~CpuCoreManager() = default; - -void CpuCoreManager::Initialize() { - barrier = std::make_unique<CpuBarrier>(); - exclusive_monitor = Cpu::MakeExclusiveMonitor(system.Memory(), cores.size()); - - for (std::size_t index = 0; index < cores.size(); ++index) { - cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); - } -} - -void CpuCoreManager::StartThreads() { - // Create threads for CPU cores 1-3, and build thread_to_cpu map - // CPU core 0 is run on the main thread - thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); - if (!Settings::values.use_multi_core) { - return; - } - - for (std::size_t index = 0; index < core_threads.size(); ++index) { - core_threads[index] = std::make_unique<std::thread>(RunCpuCore, std::cref(system), - std::ref(*cores[index + 1])); - thread_to_cpu[core_threads[index]->get_id()] = cores[index + 1].get(); - } -} - -void CpuCoreManager::Shutdown() { - barrier->NotifyEnd(); - if (Settings::values.use_multi_core) { - for (auto& thread : core_threads) { - thread->join(); - thread.reset(); - } - } - - thread_to_cpu.clear(); - for (auto& cpu_core : cores) { - cpu_core->Shutdown(); - cpu_core.reset(); - } - - exclusive_monitor.reset(); - barrier.reset(); -} - -Cpu& CpuCoreManager::GetCore(std::size_t index) { - return *cores.at(index); -} - -const Cpu& CpuCoreManager::GetCore(std::size_t index) const { - return *cores.at(index); -} - -ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() { - return *exclusive_monitor; -} - -const ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() const { - return *exclusive_monitor; -} - -Cpu& CpuCoreManager::GetCurrentCore() { - if (Settings::values.use_multi_core) { - const auto& search = thread_to_cpu.find(std::this_thread::get_id()); - ASSERT(search != thread_to_cpu.end()); - ASSERT(search->second); - return *search->second; - } - - // Otherwise, use single-threaded mode active_core variable - return *cores[active_core]; -} - -const Cpu& CpuCoreManager::GetCurrentCore() const { - if (Settings::values.use_multi_core) { - const auto& search = thread_to_cpu.find(std::this_thread::get_id()); - ASSERT(search != thread_to_cpu.end()); - ASSERT(search->second); - return *search->second; - } - - // Otherwise, use single-threaded mode active_core variable - return *cores[active_core]; -} - -void CpuCoreManager::RunLoop(bool tight_loop) { - // Update thread_to_cpu in case Core 0 is run from a different host thread - thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); - - if (GDBStub::IsServerEnabled()) { - GDBStub::HandlePacket(); - - // If the loop is halted and we want to step, use a tiny (1) number of instructions to - // execute. Otherwise, get out of the loop function. - if (GDBStub::GetCpuHaltFlag()) { - if (GDBStub::GetCpuStepFlag()) { - tight_loop = false; - } else { - return; - } - } - } - - auto& core_timing = system.CoreTiming(); - core_timing.ResetRun(); - bool keep_running{}; - do { - keep_running = false; - for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { - core_timing.SwitchContext(active_core); - if (core_timing.CanCurrentContextRun()) { - cores[active_core]->RunLoop(tight_loop); - } - keep_running |= core_timing.CanCurrentContextRun(); - } - } while (keep_running); - - if (GDBStub::IsServerEnabled()) { - GDBStub::SetCpuStepFlag(false); - } -} - -void CpuCoreManager::InvalidateAllInstructionCaches() { - for (auto& cpu : cores) { - cpu->ArmInterface().ClearInstructionCache(); - } -} - -} // namespace Core diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h deleted file mode 100644 index 2cbbf8216..000000000 --- a/src/core/cpu_core_manager.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <map> -#include <memory> -#include <thread> - -namespace Core { - -class Cpu; -class CpuBarrier; -class ExclusiveMonitor; -class System; - -class CpuCoreManager { -public: - explicit CpuCoreManager(System& system); - CpuCoreManager(const CpuCoreManager&) = delete; - CpuCoreManager(CpuCoreManager&&) = delete; - - ~CpuCoreManager(); - - CpuCoreManager& operator=(const CpuCoreManager&) = delete; - CpuCoreManager& operator=(CpuCoreManager&&) = delete; - - void Initialize(); - void StartThreads(); - void Shutdown(); - - Cpu& GetCore(std::size_t index); - const Cpu& GetCore(std::size_t index) const; - - Cpu& GetCurrentCore(); - const Cpu& GetCurrentCore() const; - - ExclusiveMonitor& GetExclusiveMonitor(); - const ExclusiveMonitor& GetExclusiveMonitor() const; - - void RunLoop(bool tight_loop); - - void InvalidateAllInstructionCaches(); - -private: - static constexpr std::size_t NUM_CPU_CORES = 4; - - std::unique_ptr<ExclusiveMonitor> exclusive_monitor; - std::unique_ptr<CpuBarrier> barrier; - std::array<std::unique_ptr<Cpu>, NUM_CPU_CORES> cores; - std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> core_threads; - std::size_t active_core{}; ///< Active core, only used in single thread mode - - /// Map of guest threads to CPU cores - std::map<std::thread::id, Cpu*> thread_to_cpu; - - System& system; -}; - -} // namespace Core diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp new file mode 100644 index 000000000..70ddbdcca --- /dev/null +++ b/src/core/cpu_manager.cpp @@ -0,0 +1,81 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/arm/exclusive_monitor.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/cpu_manager.h" +#include "core/gdbstub/gdbstub.h" + +namespace Core { + +CpuManager::CpuManager(System& system) : system{system} {} +CpuManager::~CpuManager() = default; + +void CpuManager::Initialize() { + for (std::size_t index = 0; index < core_managers.size(); ++index) { + core_managers[index] = std::make_unique<CoreManager>(system, index); + } +} + +void CpuManager::Shutdown() { + for (auto& cpu_core : core_managers) { + cpu_core.reset(); + } +} + +CoreManager& CpuManager::GetCoreManager(std::size_t index) { + return *core_managers.at(index); +} + +const CoreManager& CpuManager::GetCoreManager(std::size_t index) const { + return *core_managers.at(index); +} + +CoreManager& CpuManager::GetCurrentCoreManager() { + // Otherwise, use single-threaded mode active_core variable + return *core_managers[active_core]; +} + +const CoreManager& CpuManager::GetCurrentCoreManager() const { + // Otherwise, use single-threaded mode active_core variable + return *core_managers[active_core]; +} + +void CpuManager::RunLoop(bool tight_loop) { + if (GDBStub::IsServerEnabled()) { + GDBStub::HandlePacket(); + + // If the loop is halted and we want to step, use a tiny (1) number of instructions to + // execute. Otherwise, get out of the loop function. + if (GDBStub::GetCpuHaltFlag()) { + if (GDBStub::GetCpuStepFlag()) { + tight_loop = false; + } else { + return; + } + } + } + + auto& core_timing = system.CoreTiming(); + core_timing.ResetRun(); + bool keep_running{}; + do { + keep_running = false; + for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { + core_timing.SwitchContext(active_core); + if (core_timing.CanCurrentContextRun()) { + core_managers[active_core]->RunLoop(tight_loop); + } + keep_running |= core_timing.CanCurrentContextRun(); + } + } while (keep_running); + + if (GDBStub::IsServerEnabled()) { + GDBStub::SetCpuStepFlag(false); + } +} + +} // namespace Core diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h new file mode 100644 index 000000000..97554d1bb --- /dev/null +++ b/src/core/cpu_manager.h @@ -0,0 +1,49 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include "core/hardware_properties.h" + +namespace Core { + +class CoreManager; +class System; + +class CpuManager { +public: + explicit CpuManager(System& system); + CpuManager(const CpuManager&) = delete; + CpuManager(CpuManager&&) = delete; + + ~CpuManager(); + + CpuManager& operator=(const CpuManager&) = delete; + CpuManager& operator=(CpuManager&&) = delete; + + void Initialize(); + void Shutdown(); + + CoreManager& GetCoreManager(std::size_t index); + const CoreManager& GetCoreManager(std::size_t index) const; + + CoreManager& GetCurrentCoreManager(); + const CoreManager& GetCurrentCoreManager() const; + + std::size_t GetActiveCoreIndex() const { + return active_core; + } + + void RunLoop(bool tight_loop); + +private: + std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers; + std::size_t active_core{}; ///< Active core, only used in single thread mode + + System& system; +}; + +} // namespace Core diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 4a9912641..3376eedc5 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -75,6 +75,13 @@ public: return nullptr; } + /// Returns if window is shown (not minimized) + virtual bool IsShown() const = 0; + + /// Retrieves Vulkan specific handlers from the window + virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const = 0; + /** * Signal that a touch pressed event has occurred (e.g. mouse click pressed) * @param framebuffer_x Framebuffer x-coordinate that was pressed diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index d6d2cf3f0..2dc795d56 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -27,9 +27,9 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { // so just calculate them both even if the other isn't showing. FramebufferLayout res{width, height}; - const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / - ScreenUndocked::Width}; - const auto window_aspect_ratio = static_cast<float>(height) / width; + const float window_aspect_ratio = static_cast<float>(height) / width; + const float emulation_aspect_ratio = EmulationAspectRatio( + static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio); const Common::Rectangle<u32> screen_window_area{0, 0, width, height}; Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); @@ -58,4 +58,19 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { return DefaultFrameLayout(width, height); } +float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio) { + switch (aspect) { + case AspectRatio::Default: + return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; + case AspectRatio::R4_3: + return 3.0f / 4.0f; + case AspectRatio::R21_9: + return 9.0f / 21.0f; + case AspectRatio::StretchToWindow: + return window_aspect_ratio; + default: + return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; + } +} + } // namespace Layout diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index d2370adde..1d39c1faf 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -18,6 +18,13 @@ enum ScreenDocked : u32 { HeightDocked = 1080, }; +enum class AspectRatio { + Default, + R4_3, + R21_9, + StretchToWindow, +}; + /// Describes the layout of the window framebuffer struct FramebufferLayout { u32 width{ScreenUndocked::Width}; @@ -48,4 +55,12 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height); */ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); +/** + * Convenience method to determine emulation aspect ratio + * @param aspect Represents the index of aspect ratio stored in Settings::values.aspect_ratio + * @param window_aspect_ratio Current window aspect ratio + * @return Emulation render window aspect ratio + */ +float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio); + } // namespace Layout diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h index 7c11d7546..2b098b7c6 100644 --- a/src/core/frontend/input.h +++ b/src/core/frontend/input.h @@ -15,6 +15,13 @@ namespace Input { +enum class AnalogDirection : u8 { + RIGHT, + LEFT, + UP, + DOWN, +}; + /// An abstract class template for an input device (a button, an analog input, etc.). template <typename StatusType> class InputDevice { @@ -23,6 +30,9 @@ public: virtual StatusType GetStatus() const { return {}; } + virtual bool GetAnalogDirectionStatus(AnalogDirection direction) const { + return {}; + } }; /// An abstract class template for a factory that can create input devices. diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 37cb28848..67e95999d 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -35,7 +35,7 @@ #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h new file mode 100644 index 000000000..213461b6a --- /dev/null +++ b/src/core/hardware_properties.h @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <tuple> + +#include "common/common_types.h" + +namespace Core { + +namespace Hardware { + +// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz +// The exact value used is of course unverified. +constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked +constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed +constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores + +} // namespace Hardware + +struct EmuThreadHandle { + u32 host_handle; + u32 guest_handle; + + u64 GetRaw() const { + return (static_cast<u64>(host_handle) << 32) | guest_handle; + } + + bool operator==(const EmuThreadHandle& rhs) const { + return std::tie(host_handle, guest_handle) == std::tie(rhs.host_handle, rhs.guest_handle); + } + + bool operator!=(const EmuThreadHandle& rhs) const { + return !operator==(rhs); + } + + static constexpr EmuThreadHandle InvalidHandle() { + constexpr u32 invalid_handle = 0xFFFFFFFF; + return {invalid_handle, invalid_handle}; + } +}; + +} // namespace Core diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index db189c8e3..8475b698c 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -8,7 +8,6 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/scheduler.h" @@ -202,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread->GetPriority() >= thread->GetPriority()) { - thread_list.insert(it, thread); - return; - } - ++it; + + const auto iter = + std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) { + return entry->GetPriority() >= thread->GetPriority(); + }); + + if (iter == thread_list.cend()) { + thread_list.push_back(std::move(thread)); + } else { + thread_list.insert(iter, std::move(thread)); } - thread_list.push_back(std::move(thread)); } void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread.get() == thread.get()) { - thread_list.erase(it); - return; - } - ++it; - } - UNREACHABLE(); + + const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), + [&thread](const auto& entry) { return thread == entry; }); + + ASSERT(iter != thread_list.cend()); + + thread_list.erase(iter); } -std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { - std::vector<std::shared_ptr<Thread>> result; - std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - std::shared_ptr<Thread> current_thread = *it; - result.push_back(std::move(current_thread)); - ++it; +std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( + VAddr address) const { + const auto iter = arb_threads.find(address); + if (iter == arb_threads.cend()) { + return {}; } - return result; + + const std::list<std::shared_ptr<Thread>>& thread_list = iter->second; + return {thread_list.cbegin(), thread_list.cend()}; } } // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index 386983e54..f958eee5a 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -86,7 +86,7 @@ private: void RemoveThread(std::shared_ptr<Thread> thread); // Gets the threads waiting on an address. - std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); + std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; /// List of threads waiting for a address arbiter std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index 4669a14ad..6d66276bc 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp @@ -12,7 +12,7 @@ namespace Kernel { -ClientSession::ClientSession(KernelCore& kernel) : WaitObject{kernel} {} +ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {} ClientSession::~ClientSession() { // This destructor will be called automatically when the last ClientSession handle is closed by @@ -31,6 +31,11 @@ void ClientSession::Acquire(Thread* thread) { UNIMPLEMENTED(); } +bool ClientSession::IsSignaled() const { + UNIMPLEMENTED(); + return true; +} + ResultVal<std::shared_ptr<ClientSession>> ClientSession::Create(KernelCore& kernel, std::shared_ptr<Session> parent, std::string name) { diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h index b4289a9a8..d15b09554 100644 --- a/src/core/hle/kernel/client_session.h +++ b/src/core/hle/kernel/client_session.h @@ -7,7 +7,7 @@ #include <memory> #include <string> -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" union ResultCode; @@ -22,7 +22,7 @@ class KernelCore; class Session; class Thread; -class ClientSession final : public WaitObject { +class ClientSession final : public SynchronizationObject { public: explicit ClientSession(KernelCore& kernel); ~ClientSession() override; @@ -48,6 +48,8 @@ public: void Acquire(Thread* thread) override; + bool IsSignaled() const override; + private: static ResultVal<std::shared_ptr<ClientSession>> Create(KernelCore& kernel, std::shared_ptr<Session> parent, diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 2db28dcf0..c558a2f33 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -47,15 +47,15 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( const std::string& reason, u64 timeout, WakeupCallback&& callback, std::shared_ptr<WritableEvent> writable_event) { // Put the client thread to sleep until the wait event is signaled or the timeout expires. - thread->SetWakeupCallback([context = *this, callback](ThreadWakeupReason reason, - std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, - std::size_t index) mutable -> bool { - ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent); - callback(thread, context, reason); - context.WriteToOutgoingCommandBuffer(*thread); - return true; - }); + thread->SetWakeupCallback( + [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread, + std::shared_ptr<SynchronizationObject> object, + std::size_t index) mutable -> bool { + ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent); + callback(thread, context, reason); + context.WriteToOutgoingCommandBuffer(*thread); + return true; + }); auto& kernel = Core::System::GetInstance().Kernel(); if (!writable_event) { @@ -67,7 +67,7 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( const auto readable_event{writable_event->GetReadableEvent()}; writable_event->Clear(); thread->SetStatus(ThreadStatus::WaitHLEEvent); - thread->SetWaitObjects({readable_event}); + thread->SetSynchronizationObjects({readable_event}); readable_event->AddWaitingThread(thread); if (timeout > 0) { @@ -284,13 +284,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) { std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const { std::vector<u8> buffer; - const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()}; + const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && + BufferDescriptorA()[buffer_index].Size()}; auto& memory = Core::System::GetInstance().Memory(); if (is_buffer_a) { + ASSERT_MSG(BufferDescriptorA().size() > buffer_index, + "BufferDescriptorA invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorA()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size()); } else { + ASSERT_MSG(BufferDescriptorX().size() > buffer_index, + "BufferDescriptorX invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorX()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size()); } @@ -305,7 +310,8 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, return 0; } - const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()}; + const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && + BufferDescriptorB()[buffer_index].Size()}; const std::size_t buffer_size{GetWriteBufferSize(buffer_index)}; if (size > buffer_size) { LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size, @@ -315,8 +321,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, auto& memory = Core::System::GetInstance().Memory(); if (is_buffer_b) { + ASSERT_MSG(BufferDescriptorB().size() > buffer_index, + "BufferDescriptorB invalid buffer_index {}", buffer_index); + ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size, + "BufferDescriptorB buffer_index {} is not large enough", buffer_index); memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size); } else { + ASSERT_MSG(BufferDescriptorC().size() > buffer_index, + "BufferDescriptorC invalid buffer_index {}", buffer_index); + ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size, + "BufferDescriptorC buffer_index {} is not large enough", buffer_index); memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size); } @@ -324,15 +338,35 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, } std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const { - const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()}; - return is_buffer_a ? BufferDescriptorA()[buffer_index].Size() - : BufferDescriptorX()[buffer_index].Size(); + const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && + BufferDescriptorA()[buffer_index].Size()}; + if (is_buffer_a) { + ASSERT_MSG(BufferDescriptorA().size() > buffer_index, + "BufferDescriptorA invalid buffer_index {}", buffer_index); + ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0, + "BufferDescriptorA buffer_index {} is empty", buffer_index); + return BufferDescriptorA()[buffer_index].Size(); + } else { + ASSERT_MSG(BufferDescriptorX().size() > buffer_index, + "BufferDescriptorX invalid buffer_index {}", buffer_index); + ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0, + "BufferDescriptorX buffer_index {} is empty", buffer_index); + return BufferDescriptorX()[buffer_index].Size(); + } } std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const { - const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()}; - return is_buffer_b ? BufferDescriptorB()[buffer_index].Size() - : BufferDescriptorC()[buffer_index].Size(); + const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && + BufferDescriptorB()[buffer_index].Size()}; + if (is_buffer_b) { + ASSERT_MSG(BufferDescriptorB().size() > buffer_index, + "BufferDescriptorB invalid buffer_index {}", buffer_index); + return BufferDescriptorB()[buffer_index].Size(); + } else { + ASSERT_MSG(BufferDescriptorC().size() > buffer_index, + "BufferDescriptorC invalid buffer_index {}", buffer_index); + return BufferDescriptorC()[buffer_index].Size(); + } } std::string HLERequestContext::Description() const { diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 1d0783bd3..4eb1d8703 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -3,13 +3,15 @@ // Refer to the license.txt file included. #include <atomic> +#include <functional> #include <memory> #include <mutex> #include <utility> #include "common/assert.h" #include "common/logging/log.h" - +#include "core/arm/arm_interface.h" +#include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" @@ -17,9 +19,11 @@ #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" #include "core/hle/lock.h" #include "core/hle/result.h" @@ -51,10 +55,10 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ if (thread->GetStatus() == ThreadStatus::WaitSynch || thread->GetStatus() == ThreadStatus::WaitHLEEvent) { // Remove the thread from each of its waiting objects' waitlists - for (const auto& object : thread->GetWaitObjects()) { + for (const auto& object : thread->GetSynchronizationObjects()) { object->RemoveWaitingThread(thread); } - thread->ClearWaitObjects(); + thread->ClearSynchronizationObjects(); // Invoke the wakeup callback before clearing the wait objects if (thread->HasWakeupCallback()) { @@ -93,11 +97,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ } struct KernelCore::Impl { - explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {} + explicit Impl(Core::System& system) + : system{system}, global_scheduler{system}, synchronization{system} {} void Initialize(KernelCore& kernel) { Shutdown(); + InitializePhysicalCores(); InitializeSystemResourceLimit(kernel); InitializeThreads(); InitializePreemption(); @@ -121,6 +127,21 @@ struct KernelCore::Impl { global_scheduler.Shutdown(); named_ports.clear(); + + for (auto& core : cores) { + core.Shutdown(); + } + cores.clear(); + + exclusive_monitor.reset(); + } + + void InitializePhysicalCores() { + exclusive_monitor = + Core::MakeExclusiveMonitor(system.Memory(), global_scheduler.CpuCoresCount()); + for (std::size_t i = 0; i < global_scheduler.CpuCoresCount(); i++) { + cores.emplace_back(system, i, *exclusive_monitor); + } } // Creates the default system resource limit @@ -172,6 +193,7 @@ struct KernelCore::Impl { std::vector<std::shared_ptr<Process>> process_list; Process* current_process = nullptr; Kernel::GlobalScheduler global_scheduler; + Kernel::Synchronization synchronization; std::shared_ptr<ResourceLimit> system_resource_limit; @@ -186,6 +208,9 @@ struct KernelCore::Impl { /// the ConnectToPort SVC. NamedPortTable named_ports; + std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; + std::vector<Kernel::PhysicalCore> cores; + // System context Core::System& system; }; @@ -240,6 +265,42 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { return impl->global_scheduler; } +Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) { + return impl->cores[id]; +} + +const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const { + return impl->cores[id]; +} + +Kernel::Synchronization& KernelCore::Synchronization() { + return impl->synchronization; +} + +const Kernel::Synchronization& KernelCore::Synchronization() const { + return impl->synchronization; +} + +Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() { + return *impl->exclusive_monitor; +} + +const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { + return *impl->exclusive_monitor; +} + +void KernelCore::InvalidateAllInstructionCaches() { + for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) { + PhysicalCore(i).ArmInterface().ClearInstructionCache(); + } +} + +void KernelCore::PrepareReschedule(std::size_t id) { + if (id < impl->global_scheduler.CpuCoresCount()) { + impl->cores[id].Stop(); + } +} + void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) { impl->named_ports.emplace(std::move(name), std::move(port)); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 3bf0068ed..1eede3063 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,8 +11,9 @@ #include "core/hle/kernel/object.h" namespace Core { +class ExclusiveMonitor; class System; -} +} // namespace Core namespace Core::Timing { class CoreTiming; @@ -25,8 +26,10 @@ class AddressArbiter; class ClientPort; class GlobalScheduler; class HandleTable; +class PhysicalCore; class Process; class ResourceLimit; +class Synchronization; class Thread; /// Represents a single instance of the kernel. @@ -84,6 +87,27 @@ public: /// Gets the sole instance of the global scheduler const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Gets the an instance of the respective physical CPU core. + Kernel::PhysicalCore& PhysicalCore(std::size_t id); + + /// Gets the an instance of the respective physical CPU core. + const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; + + /// Gets the an instance of the Synchronization Interface. + Kernel::Synchronization& Synchronization(); + + /// Gets the an instance of the Synchronization Interface. + const Kernel::Synchronization& Synchronization() const; + + /// Stops execution of 'id' core, in order to reschedule a new thread. + void PrepareReschedule(std::size_t id); + + Core::ExclusiveMonitor& GetExclusiveMonitor(); + + const Core::ExclusiveMonitor& GetExclusiveMonitor() const; + + void InvalidateAllInstructionCaches(); + /// Adds a port to the named port table void AddNamedPort(std::string name, std::shared_ptr<ClientPort> port); diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp new file mode 100644 index 000000000..9303dd273 --- /dev/null +++ b/src/core/hle/kernel/physical_core.cpp @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/arm/arm_interface.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h" +#include "core/core.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" + +namespace Kernel { + +PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, + Core::ExclusiveMonitor& exclusive_monitor) + : core_index{id} { +#ifdef ARCHITECTURE_x86_64 + arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); +#else + arm_interface = std::make_shared<Core::ARM_Unicorn>(system); + LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); +#endif + + scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); +} + +PhysicalCore::~PhysicalCore() = default; + +void PhysicalCore::Run() { + arm_interface->Run(); + arm_interface->ClearExclusiveState(); +} + +void PhysicalCore::Step() { + arm_interface->Step(); +} + +void PhysicalCore::Stop() { + arm_interface->PrepareReschedule(); +} + +void PhysicalCore::Shutdown() { + scheduler->Shutdown(); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h new file mode 100644 index 000000000..4c32c0f1b --- /dev/null +++ b/src/core/hle/kernel/physical_core.h @@ -0,0 +1,77 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <memory> + +namespace Kernel { +class Scheduler; +} // namespace Kernel + +namespace Core { +class ARM_Interface; +class ExclusiveMonitor; +class System; +} // namespace Core + +namespace Kernel { + +class PhysicalCore { +public: + PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor); + ~PhysicalCore(); + + PhysicalCore(const PhysicalCore&) = delete; + PhysicalCore& operator=(const PhysicalCore&) = delete; + + PhysicalCore(PhysicalCore&&) = default; + PhysicalCore& operator=(PhysicalCore&&) = default; + + /// Execute current jit state + void Run(); + /// Execute a single instruction in current jit. + void Step(); + /// Stop JIT execution/exit + void Stop(); + + // Shutdown this physical core. + void Shutdown(); + + Core::ARM_Interface& ArmInterface() { + return *arm_interface; + } + + const Core::ARM_Interface& ArmInterface() const { + return *arm_interface; + } + + bool IsMainCore() const { + return core_index == 0; + } + + bool IsSystemCore() const { + return core_index == 3; + } + + std::size_t CoreIndex() const { + return core_index; + } + + Kernel::Scheduler& Scheduler() { + return *scheduler; + } + + const Kernel::Scheduler& Scheduler() const { + return *scheduler; + } + +private: + std::size_t core_index; + std::unique_ptr<Core::ARM_Interface> arm_interface; + std::unique_ptr<Kernel::Scheduler> scheduler; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index b9035a0be..2fcb7326c 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -337,7 +337,7 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { } Process::Process(Core::System& system) - : WaitObject{system.Kernel()}, vm_manager{system}, + : SynchronizationObject{system.Kernel()}, vm_manager{system}, address_arbiter{system}, mutex{system}, system{system} {} Process::~Process() = default; @@ -357,7 +357,7 @@ void Process::ChangeStatus(ProcessStatus new_status) { status = new_status; is_signaled = true; - WakeupAllWaitingThreads(); + Signal(); } void Process::AllocateMainThreadStack(u64 stack_size) { diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 3483fa19d..4887132a7 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -15,8 +15,8 @@ #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/mutex.h" #include "core/hle/kernel/process_capability.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/vm_manager.h" -#include "core/hle/kernel/wait_object.h" #include "core/hle/result.h" namespace Core { @@ -60,7 +60,7 @@ enum class ProcessStatus { DebugBreak, }; -class Process final : public WaitObject { +class Process final : public SynchronizationObject { public: explicit Process(Core::System& system); ~Process() override; @@ -359,10 +359,6 @@ private: /// specified by metadata provided to the process during loading. bool is_64bit_process = true; - /// Whether or not this process is signaled. This occurs - /// upon the process changing to a different state. - bool is_signaled = false; - /// Total running time for the process in ticks. u64 total_process_running_time_ticks = 0; diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index d8ac97aa1..9d3d3a81b 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -11,30 +11,30 @@ namespace Kernel { -ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {} +ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {} ReadableEvent::~ReadableEvent() = default; bool ReadableEvent::ShouldWait(const Thread* thread) const { - return !signaled; + return !is_signaled; } void ReadableEvent::Acquire(Thread* thread) { - ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); + ASSERT_MSG(IsSignaled(), "object unavailable!"); } void ReadableEvent::Signal() { - if (!signaled) { - signaled = true; - WakeupAllWaitingThreads(); + if (!is_signaled) { + is_signaled = true; + SynchronizationObject::Signal(); }; } void ReadableEvent::Clear() { - signaled = false; + is_signaled = false; } ResultCode ReadableEvent::Reset() { - if (!signaled) { + if (!is_signaled) { return ERR_INVALID_STATE; } diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h index 11ff71c3a..3264dd066 100644 --- a/src/core/hle/kernel/readable_event.h +++ b/src/core/hle/kernel/readable_event.h @@ -5,7 +5,7 @@ #pragma once #include "core/hle/kernel/object.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" union ResultCode; @@ -14,7 +14,7 @@ namespace Kernel { class KernelCore; class WritableEvent; -class ReadableEvent final : public WaitObject { +class ReadableEvent final : public SynchronizationObject { friend class WritableEvent; public: @@ -46,13 +46,11 @@ public: /// then ERR_INVALID_STATE will be returned. ResultCode Reset(); + void Signal() override; + private: explicit ReadableEvent(KernelCore& kernel); - void Signal(); - - bool signaled{}; - std::string name; ///< Name of event (optional) }; diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index d36fcd7d9..86f1421bf 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -14,7 +14,6 @@ #include "common/logging/log.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/core_timing.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" @@ -125,8 +124,8 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { "Thread yielding without being in front"); scheduled_queue[core_id].yield(priority); - std::array<Thread*, NUM_CPU_CORES> current_threads; - for (u32 i = 0; i < NUM_CPU_CORES; i++) { + std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; + for (std::size_t i = 0; i < current_threads.size(); i++) { current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); } @@ -178,8 +177,8 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread // function... if (scheduled_queue[core_id].empty()) { // Here, "current_threads" is calculated after the ""yield"", unlike yield -1 - std::array<Thread*, NUM_CPU_CORES> current_threads; - for (u32 i = 0; i < NUM_CPU_CORES; i++) { + std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; + for (std::size_t i = 0; i < current_threads.size(); i++) { current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); } for (auto& thread : suggested_queue[core_id]) { @@ -209,7 +208,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread } void GlobalScheduler::PreemptThreads() { - for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) { + for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { const u32 priority = preemption_priorities[core_id]; if (scheduled_queue[core_id].size(priority) > 0) { @@ -350,7 +349,7 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, } void GlobalScheduler::Shutdown() { - for (std::size_t core = 0; core < NUM_CPU_CORES; core++) { + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { scheduled_queue[core].clear(); suggested_queue[core].clear(); } diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 14b77960a..96db049cb 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/multi_level_queue.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/thread.h" namespace Core { @@ -23,8 +24,6 @@ class Process; class GlobalScheduler final { public: - static constexpr u32 NUM_CPU_CORES = 4; - explicit GlobalScheduler(Core::System& system); ~GlobalScheduler(); @@ -125,7 +124,7 @@ public: void PreemptThreads(); u32 CpuCoresCount() const { - return NUM_CPU_CORES; + return Core::Hardware::NUM_CPU_CORES; } void SetReselectionPending() { @@ -149,13 +148,15 @@ private: bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner); static constexpr u32 min_regular_priority = 2; - std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue; - std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue; + std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES> + scheduled_queue; + std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES> + suggested_queue; std::atomic<bool> is_reselection_pending{false}; // The priority levels at which the global scheduler preempts threads every 10 ms. They are // ordered from Core 0 to Core 3. - std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; + std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; /// Lists all thread ids that aren't deleted/etc. std::vector<std::shared_ptr<Thread>> thread_list; diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp index a4ccfa35e..a549ae9d7 100644 --- a/src/core/hle/kernel/server_port.cpp +++ b/src/core/hle/kernel/server_port.cpp @@ -13,7 +13,7 @@ namespace Kernel { -ServerPort::ServerPort(KernelCore& kernel) : WaitObject{kernel} {} +ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {} ServerPort::~ServerPort() = default; ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { @@ -39,6 +39,10 @@ void ServerPort::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); } +bool ServerPort::IsSignaled() const { + return !pending_sessions.empty(); +} + ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions, std::string name) { std::shared_ptr<ServerPort> server_port = std::make_shared<ServerPort>(kernel); diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h index 8be8a75ea..41b191b86 100644 --- a/src/core/hle/kernel/server_port.h +++ b/src/core/hle/kernel/server_port.h @@ -10,7 +10,7 @@ #include <vector> #include "common/common_types.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Kernel { @@ -20,7 +20,7 @@ class KernelCore; class ServerSession; class SessionRequestHandler; -class ServerPort final : public WaitObject { +class ServerPort final : public SynchronizationObject { public: explicit ServerPort(KernelCore& kernel); ~ServerPort() override; @@ -82,6 +82,8 @@ public: bool ShouldWait(const Thread* thread) const override; void Acquire(Thread* thread) override; + bool IsSignaled() const override; + private: /// ServerSessions waiting to be accepted by the port std::vector<std::shared_ptr<ServerSession>> pending_sessions; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 7825e1ec4..4604e35c5 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -24,7 +24,7 @@ namespace Kernel { -ServerSession::ServerSession(KernelCore& kernel) : WaitObject{kernel} {} +ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} ServerSession::~ServerSession() = default; ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, @@ -50,6 +50,16 @@ bool ServerSession::ShouldWait(const Thread* thread) const { return pending_requesting_threads.empty() || currently_handling != nullptr; } +bool ServerSession::IsSignaled() const { + // Closed sessions should never wait, an error will be returned from svcReplyAndReceive. + if (!parent->Client()) { + return true; + } + + // Wait if we have no pending requests, or if we're currently handling a request. + return !pending_requesting_threads.empty() && currently_handling == nullptr; +} + void ServerSession::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); // We are now handling a request, pop it from the stack. diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index d6e48109e..77e4f6721 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -10,7 +10,7 @@ #include <vector> #include "common/threadsafe_queue.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Memory { @@ -41,7 +41,7 @@ class Thread; * After the server replies to the request, the response is marshalled back to the caller's * TLS buffer and control is transferred back to it. */ -class ServerSession final : public WaitObject { +class ServerSession final : public SynchronizationObject { public: explicit ServerSession(KernelCore& kernel); ~ServerSession() override; @@ -73,6 +73,8 @@ public: return parent.get(); } + bool IsSignaled() const override; + /** * Sets the HLE handler for the session. This handler will be called to service IPC requests * instead of the regular IPC machinery. (The regular IPC machinery is currently not diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp index dee6e2b72..e4dd53e24 100644 --- a/src/core/hle/kernel/session.cpp +++ b/src/core/hle/kernel/session.cpp @@ -9,7 +9,7 @@ namespace Kernel { -Session::Session(KernelCore& kernel) : WaitObject{kernel} {} +Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {} Session::~Session() = default; Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { @@ -29,6 +29,11 @@ bool Session::ShouldWait(const Thread* thread) const { return {}; } +bool Session::IsSignaled() const { + UNIMPLEMENTED(); + return true; +} + void Session::Acquire(Thread* thread) { UNIMPLEMENTED(); } diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h index 15a5ac15f..7cd9c0d77 100644 --- a/src/core/hle/kernel/session.h +++ b/src/core/hle/kernel/session.h @@ -8,7 +8,7 @@ #include <string> #include <utility> -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" namespace Kernel { @@ -19,7 +19,7 @@ class ServerSession; * Parent structure to link the client and server endpoints of a session with their associated * client port. */ -class Session final : public WaitObject { +class Session final : public SynchronizationObject { public: explicit Session(KernelCore& kernel); ~Session() override; @@ -39,6 +39,8 @@ public: bool ShouldWait(const Thread* thread) const override; + bool IsSignaled() const override; + void Acquire(Thread* thread) override; std::shared_ptr<ClientSession> Client() { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index dbcdb0b88..fd91779a3 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -15,7 +15,7 @@ #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/kernel/address_arbiter.h" @@ -32,6 +32,7 @@ #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/svc_wrap.h" +#include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/transfer_memory.h" #include "core/hle/kernel/writable_event.h" @@ -433,22 +434,6 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han return ERR_INVALID_HANDLE; } -/// Default thread wakeup callback for WaitSynchronization -static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); - - if (reason == ThreadWakeupReason::Timeout) { - thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); - return true; - } - - ASSERT(reason == ThreadWakeupReason::Signal); - thread->SetWaitSynchronizationResult(RESULT_SUCCESS); - thread->SetWaitSynchronizationOutput(static_cast<u32>(index)); - return true; -}; - /// Wait for the given handles to synchronize, timeout after the specified nanoseconds static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address, u64 handle_count, s64 nano_seconds) { @@ -472,14 +457,14 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr } auto* const thread = system.CurrentScheduler().GetCurrentThread(); - - using ObjectPtr = Thread::ThreadWaitObjects::value_type; - Thread::ThreadWaitObjects objects(handle_count); - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto& kernel = system.Kernel(); + using ObjectPtr = Thread::ThreadSynchronizationObjects::value_type; + Thread::ThreadSynchronizationObjects objects(handle_count); + const auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); for (u64 i = 0; i < handle_count; ++i) { const Handle handle = memory.Read32(handles_address + i * sizeof(Handle)); - const auto object = handle_table.Get<WaitObject>(handle); + const auto object = handle_table.Get<SynchronizationObject>(handle); if (object == nullptr) { LOG_ERROR(Kernel_SVC, "Object is a nullptr"); @@ -488,47 +473,10 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr objects[i] = object; } - - // Find the first object that is acquirable in the provided list of objects - auto itr = std::find_if(objects.begin(), objects.end(), [thread](const ObjectPtr& object) { - return !object->ShouldWait(thread); - }); - - if (itr != objects.end()) { - // We found a ready object, acquire it and set the result value - WaitObject* object = itr->get(); - object->Acquire(thread); - *index = static_cast<s32>(std::distance(objects.begin(), itr)); - return RESULT_SUCCESS; - } - - // No objects were ready to be acquired, prepare to suspend the thread. - - // If a timeout value of 0 was provided, just return the Timeout error code instead of - // suspending the thread. - if (nano_seconds == 0) { - return RESULT_TIMEOUT; - } - - if (thread->IsSyncCancelled()) { - thread->SetSyncCancelled(false); - return ERR_SYNCHRONIZATION_CANCELED; - } - - for (auto& object : objects) { - object->AddWaitingThread(SharedFrom(thread)); - } - - thread->SetWaitObjects(std::move(objects)); - thread->SetStatus(ThreadStatus::WaitSynch); - - // Create an event to wake the thread up after the specified nanosecond delay has passed - thread->WakeAfterDelay(nano_seconds); - thread->SetWakeupCallback(DefaultThreadWakeupCallback); - - system.PrepareReschedule(thread->GetProcessorID()); - - return RESULT_TIMEOUT; + auto& synchronization = kernel.Synchronization(); + const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds); + *index = handle_result; + return result; } /// Resumes a thread waiting on WaitSynchronization @@ -1863,10 +1811,14 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd } auto& kernel = system.Kernel(); - auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms); + auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms); + + if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) { + return reserve_result; + } auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); - const auto result = handle_table.Create(std::move(transfer_mem_handle)); + const auto result{handle_table.Create(std::move(transfer_mem_handle))}; if (result.Failed()) { return result.Code(); } diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp new file mode 100644 index 000000000..dc37fad1a --- /dev/null +++ b/src/core/hle/kernel/synchronization.cpp @@ -0,0 +1,87 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/hle/kernel/errors.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/synchronization.h" +#include "core/hle/kernel/synchronization_object.h" +#include "core/hle/kernel/thread.h" + +namespace Kernel { + +/// Default thread wakeup callback for WaitSynchronization +static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, + std::shared_ptr<SynchronizationObject> object, + std::size_t index) { + ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); + + if (reason == ThreadWakeupReason::Timeout) { + thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); + return true; + } + + ASSERT(reason == ThreadWakeupReason::Signal); + thread->SetWaitSynchronizationResult(RESULT_SUCCESS); + thread->SetWaitSynchronizationOutput(static_cast<u32>(index)); + return true; +} + +Synchronization::Synchronization(Core::System& system) : system{system} {} + +void Synchronization::SignalObject(SynchronizationObject& obj) const { + if (obj.IsSignaled()) { + obj.WakeupAllWaitingThreads(); + } +} + +std::pair<ResultCode, Handle> Synchronization::WaitFor( + std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { + auto* const thread = system.CurrentScheduler().GetCurrentThread(); + // Find the first object that is acquirable in the provided list of objects + const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(), + [thread](const std::shared_ptr<SynchronizationObject>& object) { + return object->IsSignaled(); + }); + + if (itr != sync_objects.end()) { + // We found a ready object, acquire it and set the result value + SynchronizationObject* object = itr->get(); + object->Acquire(thread); + const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); + return {RESULT_SUCCESS, index}; + } + + // No objects were ready to be acquired, prepare to suspend the thread. + + // If a timeout value of 0 was provided, just return the Timeout error code instead of + // suspending the thread. + if (nano_seconds == 0) { + return {RESULT_TIMEOUT, InvalidHandle}; + } + + if (thread->IsSyncCancelled()) { + thread->SetSyncCancelled(false); + return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; + } + + for (auto& object : sync_objects) { + object->AddWaitingThread(SharedFrom(thread)); + } + + thread->SetSynchronizationObjects(std::move(sync_objects)); + thread->SetStatus(ThreadStatus::WaitSynch); + + // Create an event to wake the thread up after the specified nanosecond delay has passed + thread->WakeAfterDelay(nano_seconds); + thread->SetWakeupCallback(DefaultThreadWakeupCallback); + + system.PrepareReschedule(thread->GetProcessorID()); + + return {RESULT_TIMEOUT, InvalidHandle}; +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/synchronization.h b/src/core/hle/kernel/synchronization.h new file mode 100644 index 000000000..379f4b1d3 --- /dev/null +++ b/src/core/hle/kernel/synchronization.h @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <utility> +#include <vector> + +#include "core/hle/kernel/object.h" +#include "core/hle/result.h" + +namespace Core { +class System; +} // namespace Core + +namespace Kernel { + +class SynchronizationObject; + +/** + * The 'Synchronization' class is an interface for handling synchronization methods + * used by Synchronization objects and synchronization SVCs. This centralizes processing of + * such + */ +class Synchronization { +public: + explicit Synchronization(Core::System& system); + + /// Signals a synchronization object, waking up all its waiting threads + void SignalObject(SynchronizationObject& obj) const; + + /// Tries to see if waiting for any of the sync_objects is necessary, if not + /// it returns Success and the handle index of the signaled sync object. In + /// case not, the current thread will be locked and wait for nano_seconds or + /// for a synchronization object to signal. + std::pair<ResultCode, Handle> WaitFor( + std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds); + +private: + Core::System& system; +}; +} // namespace Kernel diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/synchronization_object.cpp index 745f2c4e8..43f3eef18 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/synchronization_object.cpp @@ -7,24 +7,29 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/synchronization.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/thread.h" namespace Kernel { -WaitObject::WaitObject(KernelCore& kernel) : Object{kernel} {} -WaitObject::~WaitObject() = default; +SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {} +SynchronizationObject::~SynchronizationObject() = default; -void WaitObject::AddWaitingThread(std::shared_ptr<Thread> thread) { +void SynchronizationObject::Signal() { + kernel.Synchronization().SignalObject(*this); +} + +void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) { auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread); if (itr == waiting_threads.end()) waiting_threads.push_back(std::move(thread)); } -void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) { +void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) { auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread); // If a thread passed multiple handles to the same object, // the kernel might attempt to remove the thread from the object's @@ -33,7 +38,7 @@ void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) { waiting_threads.erase(itr); } -std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const { +std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const { Thread* candidate = nullptr; u32 candidate_priority = THREADPRIO_LOWEST + 1; @@ -51,23 +56,14 @@ std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const { if (ShouldWait(thread.get())) continue; - // A thread is ready to run if it's either in ThreadStatus::WaitSynch - // and the rest of the objects it is waiting on are ready. - bool ready_to_run = true; - if (thread_status == ThreadStatus::WaitSynch) { - ready_to_run = thread->AllWaitObjectsReady(); - } - - if (ready_to_run) { - candidate = thread.get(); - candidate_priority = thread->GetPriority(); - } + candidate = thread.get(); + candidate_priority = thread->GetPriority(); } return SharedFrom(candidate); } -void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { +void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { ASSERT(!ShouldWait(thread.get())); if (!thread) { @@ -75,7 +71,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { } if (thread->IsSleepingOnWait()) { - for (const auto& object : thread->GetWaitObjects()) { + for (const auto& object : thread->GetSynchronizationObjects()) { ASSERT(!object->ShouldWait(thread.get())); object->Acquire(thread.get()); } @@ -83,9 +79,9 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { Acquire(thread.get()); } - const std::size_t index = thread->GetWaitObjectIndex(SharedFrom(this)); + const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this)); - thread->ClearWaitObjects(); + thread->ClearSynchronizationObjects(); thread->CancelWakeupTimer(); @@ -96,17 +92,17 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { } if (resume) { thread->ResumeFromWait(); - Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID()); + kernel.PrepareReschedule(thread->GetProcessorID()); } } -void WaitObject::WakeupAllWaitingThreads() { +void SynchronizationObject::WakeupAllWaitingThreads() { while (auto thread = GetHighestPriorityReadyThread()) { WakeupWaitingThread(thread); } } -const std::vector<std::shared_ptr<Thread>>& WaitObject::GetWaitingThreads() const { +const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const { return waiting_threads; } diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/synchronization_object.h index 9a17958a4..741c31faf 100644 --- a/src/core/hle/kernel/wait_object.h +++ b/src/core/hle/kernel/synchronization_object.h @@ -15,10 +15,10 @@ class KernelCore; class Thread; /// Class that represents a Kernel object that a thread can be waiting on -class WaitObject : public Object { +class SynchronizationObject : public Object { public: - explicit WaitObject(KernelCore& kernel); - ~WaitObject() override; + explicit SynchronizationObject(KernelCore& kernel); + ~SynchronizationObject() override; /** * Check if the specified thread should wait until the object is available @@ -30,6 +30,13 @@ public: /// Acquire/lock the object for the specified thread if it is available virtual void Acquire(Thread* thread) = 0; + /// Signal this object + virtual void Signal(); + + virtual bool IsSignaled() const { + return is_signaled; + } + /** * Add a thread to wait on this object * @param thread Pointer to thread to add @@ -60,16 +67,20 @@ public: /// Get a const reference to the waiting threads list for debug use const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const; +protected: + bool is_signaled{}; // Tells if this sync object is signalled; + private: /// Threads waiting for this object to become available std::vector<std::shared_ptr<Thread>> waiting_threads; }; -// Specialization of DynamicObjectCast for WaitObjects +// Specialization of DynamicObjectCast for SynchronizationObjects template <> -inline std::shared_ptr<WaitObject> DynamicObjectCast<WaitObject>(std::shared_ptr<Object> object) { +inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>( + std::shared_ptr<Object> object) { if (object != nullptr && object->IsWaitable()) { - return std::static_pointer_cast<WaitObject>(object); + return std::static_pointer_cast<SynchronizationObject>(object); } return nullptr; } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index e84e5ce0d..ae5f2c8bd 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -13,9 +13,9 @@ #include "common/thread_queue_list.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" @@ -32,11 +32,15 @@ bool Thread::ShouldWait(const Thread* thread) const { return status != ThreadStatus::Dead; } +bool Thread::IsSignaled() const { + return status == ThreadStatus::Dead; +} + void Thread::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); } -Thread::Thread(KernelCore& kernel) : WaitObject{kernel} {} +Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {} Thread::~Thread() = default; void Thread::Stop() { @@ -46,7 +50,7 @@ void Thread::Stop() { kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; SetStatus(ThreadStatus::Dead); - WakeupAllWaitingThreads(); + Signal(); // Clean up any dangling references in objects that this thread was waiting for for (auto& wait_object : wait_objects) { @@ -216,7 +220,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { context.cpu_registers[1] = output; } -s32 Thread::GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const { +s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything"); const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object); return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); @@ -337,14 +341,16 @@ void Thread::ChangeCore(u32 core, u64 mask) { SetCoreAndAffinityMask(core, mask); } -bool Thread::AllWaitObjectsReady() const { - return std::none_of( - wait_objects.begin(), wait_objects.end(), - [this](const std::shared_ptr<WaitObject>& object) { return object->ShouldWait(this); }); +bool Thread::AllSynchronizationObjectsReady() const { + return std::none_of(wait_objects.begin(), wait_objects.end(), + [this](const std::shared_ptr<SynchronizationObject>& object) { + return object->ShouldWait(this); + }); } bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index) { + std::shared_ptr<SynchronizationObject> object, + std::size_t index) { ASSERT(wakeup_callback); return wakeup_callback(reason, std::move(thread), std::move(object), index); } @@ -356,7 +362,7 @@ void Thread::SetActivity(ThreadActivity value) { // Set status if not waiting if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { SetStatus(ThreadStatus::Paused); - Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule(); + kernel.PrepareReschedule(processor_id); } } else if (status == ThreadStatus::Paused) { // Ready to reschedule @@ -426,7 +432,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { const s32 old_core = processor_id; if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) { if (static_cast<s32>(ideal_core) < 0) { - processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES); + processor_id = HighestSetCore(affinity_mask, Core::Hardware::NUM_CPU_CORES); } else { processor_id = ideal_core; } @@ -450,7 +456,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Unsuggest(current_priority, core, this); } @@ -461,7 +467,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Suggest(current_priority, core, this); } @@ -475,12 +481,12 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { return; } - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (processor_id >= 0) { scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Unsuggest(old_priority, core, this); } @@ -497,7 +503,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Suggest(current_priority, core, this); } @@ -507,13 +513,13 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || current_priority >= THREADPRIO_COUNT) { return; } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (((old_affinity_mask >> core) & 1) != 0) { if (core == static_cast<u32>(old_core)) { scheduler.Unschedule(current_priority, core, this); @@ -523,7 +529,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { } } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (((affinity_mask >> core) & 1) != 0) { if (core == static_cast<u32>(processor_id)) { scheduler.Schedule(current_priority, core, this); diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 3bcf9e137..7a4916318 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Kernel { @@ -95,7 +95,7 @@ enum class ThreadSchedMasks : u32 { ForcePauseMask = 0x0070, }; -class Thread final : public WaitObject { +class Thread final : public SynchronizationObject { public: explicit Thread(KernelCore& kernel); ~Thread() override; @@ -104,11 +104,11 @@ public: using ThreadContext = Core::ARM_Interface::ThreadContext; - using ThreadWaitObjects = std::vector<std::shared_ptr<WaitObject>>; + using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; using WakeupCallback = std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index)>; + std::shared_ptr<SynchronizationObject> object, std::size_t index)>; /** * Creates and returns a new thread. The new thread is immediately scheduled @@ -146,6 +146,7 @@ public: bool ShouldWait(const Thread* thread) const override; void Acquire(Thread* thread) override; + bool IsSignaled() const override; /** * Gets the thread's current priority @@ -233,7 +234,7 @@ public: * * @param object Object to query the index of. */ - s32 GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const; + s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const; /** * Stops a thread, invalidating it from further use @@ -314,15 +315,15 @@ public: return owner_process; } - const ThreadWaitObjects& GetWaitObjects() const { + const ThreadSynchronizationObjects& GetSynchronizationObjects() const { return wait_objects; } - void SetWaitObjects(ThreadWaitObjects objects) { + void SetSynchronizationObjects(ThreadSynchronizationObjects objects) { wait_objects = std::move(objects); } - void ClearWaitObjects() { + void ClearSynchronizationObjects() { for (const auto& waiting_object : wait_objects) { waiting_object->RemoveWaitingThread(SharedFrom(this)); } @@ -330,7 +331,7 @@ public: } /// Determines whether all the objects this thread is waiting on are ready. - bool AllWaitObjectsReady() const; + bool AllSynchronizationObjectsReady() const; const MutexWaitingThreads& GetMutexWaitingThreads() const { return wait_mutex_threads; @@ -395,7 +396,7 @@ public: * will cause an assertion to trigger. */ bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index); + std::shared_ptr<SynchronizationObject> object, std::size_t index); u32 GetIdealCore() const { return ideal_core; @@ -494,7 +495,7 @@ private: /// Objects that the thread is waiting on, in the same order as they were /// passed to WaitSynchronization. - ThreadWaitObjects wait_objects; + ThreadSynchronizationObjects wait_objects; /// List of threads that are waiting for a mutex that is held by this thread. MutexWaitingThreads wait_mutex_threads; diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index f0e73f57b..f2d3f8b49 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp @@ -8,15 +8,23 @@ #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/transfer_memory.h" #include "core/hle/result.h" +#include "core/memory.h" namespace Kernel { -TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {} -TransferMemory::~TransferMemory() = default; +TransferMemory::TransferMemory(KernelCore& kernel, Memory::Memory& memory) + : Object{kernel}, memory{memory} {} -std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address, - u64 size, MemoryPermission permissions) { - std::shared_ptr<TransferMemory> transfer_memory{std::make_shared<TransferMemory>(kernel)}; +TransferMemory::~TransferMemory() { + // Release memory region when transfer memory is destroyed + Reset(); +} + +std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, Memory::Memory& memory, + VAddr base_address, u64 size, + MemoryPermission permissions) { + std::shared_ptr<TransferMemory> transfer_memory{ + std::make_shared<TransferMemory>(kernel, memory)}; transfer_memory->base_address = base_address; transfer_memory->memory_size = size; @@ -27,7 +35,7 @@ std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr } const u8* TransferMemory::GetPointer() const { - return backing_block.get()->data(); + return memory.GetPointer(base_address); } u64 TransferMemory::GetSize() const { @@ -62,6 +70,52 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p return RESULT_SUCCESS; } +ResultCode TransferMemory::Reserve() { + auto& vm_manager{owner_process->VMManager()}; + const auto check_range_result{vm_manager.CheckRangeState( + base_address, memory_size, MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, + MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::All, + VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None, + MemoryAttribute::IpcAndDeviceMapped)}; + + if (check_range_result.Failed()) { + return check_range_result.Code(); + } + + auto [state_, permissions_, attribute] = *check_range_result; + + if (const auto result{vm_manager.ReprotectRange( + base_address, memory_size, SharedMemory::ConvertPermissions(owner_permissions))}; + result.IsError()) { + return result; + } + + return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask, + attribute | MemoryAttribute::Locked); +} + +ResultCode TransferMemory::Reset() { + auto& vm_manager{owner_process->VMManager()}; + if (const auto result{vm_manager.CheckRangeState( + base_address, memory_size, + MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, + MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::None, + VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked, + MemoryAttribute::IpcAndDeviceMapped)}; + result.Failed()) { + return result.Code(); + } + + if (const auto result{ + vm_manager.ReprotectRange(base_address, memory_size, VMAPermission::ReadWrite)}; + result.IsError()) { + return result; + } + + return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask, + MemoryAttribute::None); +} + ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) { if (memory_size != size) { return ERR_INVALID_SIZE; diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index 0a6e15d18..6e388536a 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h @@ -11,6 +11,10 @@ union ResultCode; +namespace Memory { +class Memory; +} + namespace Kernel { class KernelCore; @@ -26,12 +30,13 @@ enum class MemoryPermission : u32; /// class TransferMemory final : public Object { public: - explicit TransferMemory(KernelCore& kernel); + explicit TransferMemory(KernelCore& kernel, Memory::Memory& memory); ~TransferMemory() override; static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory; - static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, u64 size, + static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Memory::Memory& memory, + VAddr base_address, u64 size, MemoryPermission permissions); TransferMemory(const TransferMemory&) = delete; @@ -80,6 +85,14 @@ public: /// ResultCode UnmapMemory(VAddr address, u64 size); + /// Reserves the region to be used for the transfer memory, called after the transfer memory is + /// created. + ResultCode Reserve(); + + /// Resets the region previously used for the transfer memory, called after the transfer memory + /// is closed. + ResultCode Reset(); + private: /// Memory block backing this instance. std::shared_ptr<PhysicalMemory> backing_block; @@ -98,6 +111,8 @@ private: /// Whether or not this transfer memory instance has mapped memory. bool is_mapped = false; + + Memory::Memory& memory; }; } // namespace Kernel diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 0b3500fce..024c22901 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -544,7 +544,8 @@ MemoryInfo VMManager::QueryMemory(VAddr address) const { ResultCode VMManager::SetMemoryAttribute(VAddr address, u64 size, MemoryAttribute mask, MemoryAttribute attribute) { - constexpr auto ignore_mask = MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped; + constexpr auto ignore_mask = + MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped | MemoryAttribute::Locked; constexpr auto attribute_mask = ~ignore_mask; const auto result = CheckRangeState( diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 850a7ebc3..90b4b006a 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -98,6 +98,8 @@ enum class MemoryAttribute : u32 { DeviceMapped = 4, /// Uncached memory Uncached = 8, + + IpcAndDeviceMapped = LockedForIPC | DeviceMapped, }; constexpr MemoryAttribute operator|(MemoryAttribute lhs, MemoryAttribute rhs) { @@ -654,6 +656,35 @@ public: /// is scheduled. Common::PageTable page_table{Memory::PAGE_BITS}; + using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>; + + /// Checks if an address range adheres to the specified states provided. + /// + /// @param address The starting address of the address range. + /// @param size The size of the address range. + /// @param state_mask The memory state mask. + /// @param state The state to compare the individual VMA states against, + /// which is done in the form of: (vma.state & state_mask) != state. + /// @param permission_mask The memory permissions mask. + /// @param permissions The permission to compare the individual VMA permissions against, + /// which is done in the form of: + /// (vma.permission & permission_mask) != permission. + /// @param attribute_mask The memory attribute mask. + /// @param attribute The memory attributes to compare the individual VMA attributes + /// against, which is done in the form of: + /// (vma.attributes & attribute_mask) != attribute. + /// @param ignore_mask The memory attributes to ignore during the check. + /// + /// @returns If successful, returns a tuple containing the memory attributes + /// (with ignored bits specified by ignore_mask unset), memory permissions, and + /// memory state across the memory range. + /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE. + /// + CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state, + VMAPermission permission_mask, VMAPermission permissions, + MemoryAttribute attribute_mask, MemoryAttribute attribute, + MemoryAttribute ignore_mask) const; + private: using VMAIter = VMAMap::iterator; @@ -707,35 +738,6 @@ private: /// Clears out the page table void ClearPageTable(); - using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>; - - /// Checks if an address range adheres to the specified states provided. - /// - /// @param address The starting address of the address range. - /// @param size The size of the address range. - /// @param state_mask The memory state mask. - /// @param state The state to compare the individual VMA states against, - /// which is done in the form of: (vma.state & state_mask) != state. - /// @param permission_mask The memory permissions mask. - /// @param permissions The permission to compare the individual VMA permissions against, - /// which is done in the form of: - /// (vma.permission & permission_mask) != permission. - /// @param attribute_mask The memory attribute mask. - /// @param attribute The memory attributes to compare the individual VMA attributes - /// against, which is done in the form of: - /// (vma.attributes & attribute_mask) != attribute. - /// @param ignore_mask The memory attributes to ignore during the check. - /// - /// @returns If successful, returns a tuple containing the memory attributes - /// (with ignored bits specified by ignore_mask unset), memory permissions, and - /// memory state across the memory range. - /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE. - /// - CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state, - VMAPermission permission_mask, VMAPermission permissions, - MemoryAttribute attribute_mask, MemoryAttribute attribute, - MemoryAttribute ignore_mask) const; - /// Gets the amount of memory currently mapped (state != Unmapped) in a range. ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const; diff --git a/src/core/hle/kernel/writable_event.cpp b/src/core/hle/kernel/writable_event.cpp index c9332e3e1..fc2f7c424 100644 --- a/src/core/hle/kernel/writable_event.cpp +++ b/src/core/hle/kernel/writable_event.cpp @@ -22,7 +22,6 @@ EventPair WritableEvent::CreateEventPair(KernelCore& kernel, std::string name) { writable_event->name = name + ":Writable"; writable_event->readable = readable_event; readable_event->name = name + ":Readable"; - readable_event->signaled = false; return {std::move(readable_event), std::move(writable_event)}; } @@ -40,7 +39,7 @@ void WritableEvent::Clear() { } bool WritableEvent::IsSignaled() const { - return readable->signaled; + return readable->IsSignaled(); } } // namespace Kernel diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 95aa5d23d..cc978713b 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -709,8 +709,34 @@ void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) { apm_sys->SetCpuBoostMode(ctx); } -IStorage::IStorage(std::vector<u8> buffer) - : ServiceFramework("IStorage"), buffer(std::move(buffer)) { +IStorageImpl::~IStorageImpl() = default; + +class StorageDataImpl final : public IStorageImpl { +public: + explicit StorageDataImpl(std::vector<u8>&& buffer) : buffer{std::move(buffer)} {} + + std::vector<u8>& GetData() override { + return buffer; + } + + const std::vector<u8>& GetData() const override { + return buffer; + } + + std::size_t GetSize() const override { + return buffer.size(); + } + +private: + std::vector<u8> buffer; +}; + +IStorage::IStorage(std::vector<u8>&& buffer) + : ServiceFramework("IStorage"), impl{std::make_shared<StorageDataImpl>(std::move(buffer))} { + Register(); +} + +void IStorage::Register() { // clang-format off static const FunctionInfo functions[] = { {0, &IStorage::Open, "Open"}, @@ -723,8 +749,13 @@ IStorage::IStorage(std::vector<u8> buffer) IStorage::~IStorage() = default; -const std::vector<u8>& IStorage::GetData() const { - return buffer; +void IStorage::Open(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_AM, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IStorageAccessor>(*this); } void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) { @@ -816,7 +847,7 @@ private: LOG_DEBUG(Service_AM, "called"); IPC::RequestParser rp{ctx}; - applet->GetBroker().PushNormalDataFromGame(*rp.PopIpcInterface<IStorage>()); + applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>()); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -825,26 +856,25 @@ private: void PopOutData(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); - IPC::ResponseBuilder rb{ctx, 2, 0, 1}; - const auto storage = applet->GetBroker().PopNormalDataToGame(); if (storage == nullptr) { LOG_ERROR(Service_AM, "storage is a nullptr. There is no data in the current normal channel"); - + IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_NO_DATA_IN_CHANNEL); return; } + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IStorage>(std::move(*storage)); + rb.PushIpcInterface<IStorage>(std::move(storage)); } void PushInteractiveInData(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); IPC::RequestParser rp{ctx}; - applet->GetBroker().PushInteractiveDataFromGame(*rp.PopIpcInterface<IStorage>()); + applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>()); ASSERT(applet->IsInitialized()); applet->ExecuteInteractive(); @@ -857,19 +887,18 @@ private: void PopInteractiveOutData(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); - IPC::ResponseBuilder rb{ctx, 2, 0, 1}; - const auto storage = applet->GetBroker().PopInteractiveDataToGame(); if (storage == nullptr) { LOG_ERROR(Service_AM, "storage is a nullptr. There is no data in the current interactive channel"); - + IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_NO_DATA_IN_CHANNEL); return; } + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IStorage>(std::move(*storage)); + rb.PushIpcInterface<IStorage>(std::move(storage)); } void GetPopOutDataEvent(Kernel::HLERequestContext& ctx) { @@ -891,15 +920,6 @@ private: std::shared_ptr<Applets::Applet> applet; }; -void IStorage::Open(Kernel::HLERequestContext& ctx) { - LOG_DEBUG(Service_AM, "called"); - - IPC::ResponseBuilder rb{ctx, 2, 0, 1}; - - rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IStorageAccessor>(*this); -} - IStorageAccessor::IStorageAccessor(IStorage& storage) : ServiceFramework("IStorageAccessor"), backing(storage) { // clang-format off @@ -921,7 +941,7 @@ void IStorageAccessor::GetSize(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 4}; rb.Push(RESULT_SUCCESS); - rb.Push(static_cast<u64>(backing.buffer.size())); + rb.Push(static_cast<u64>(backing.GetSize())); } void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) { @@ -932,17 +952,17 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size()); - if (data.size() > backing.buffer.size() - offset) { + if (data.size() > backing.GetSize() - offset) { LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}", - backing.buffer.size(), data.size(), offset); + backing.GetSize(), data.size(), offset); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_SIZE_OUT_OF_BOUNDS); return; } - std::memcpy(backing.buffer.data() + offset, data.data(), data.size()); + std::memcpy(backing.GetData().data() + offset, data.data(), data.size()); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -956,16 +976,16 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size); - if (size > backing.buffer.size() - offset) { + if (size > backing.GetSize() - offset) { LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}", - backing.buffer.size(), size, offset); + backing.GetSize(), size, offset); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_SIZE_OUT_OF_BOUNDS); return; } - ctx.WriteBuffer(backing.buffer.data() + offset, size); + ctx.WriteBuffer(backing.GetData().data() + offset, size); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -1031,7 +1051,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex rp.SetCurrentOffset(3); const auto handle{rp.Pop<Kernel::Handle>()}; - const auto transfer_mem = + auto transfer_mem = system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle); if (transfer_mem == nullptr) { @@ -1047,7 +1067,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory))); + rb.PushIpcInterface<IStorage>(std::move(memory)); } IApplicationFunctions::IApplicationFunctions(Core::System& system_) @@ -1189,13 +1209,11 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) { u64 build_id{}; std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); - const auto data = - backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id}); - + auto data = backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id}); if (data.has_value()) { IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<AM::IStorage>(*data); + rb.PushIpcInterface<IStorage>(std::move(*data)); launch_popped_application_specific = true; return; } @@ -1218,7 +1236,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) { std::vector<u8> buffer(sizeof(LaunchParameterAccountPreselectedUser)); std::memcpy(buffer.data(), ¶ms, buffer.size()); - rb.PushIpcInterface<AM::IStorage>(buffer); + rb.PushIpcInterface<IStorage>(std::move(buffer)); launch_popped_account_preselect = true; return; } diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 448817be9..0b9a4332d 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -12,7 +12,8 @@ namespace Kernel { class KernelCore; -} +class TransferMemory; +} // namespace Kernel namespace Service::NVFlinger { class NVFlinger; @@ -188,19 +189,36 @@ private: std::shared_ptr<AppletMessageQueue> msg_queue; }; +class IStorageImpl { +public: + virtual ~IStorageImpl(); + virtual std::vector<u8>& GetData() = 0; + virtual const std::vector<u8>& GetData() const = 0; + virtual std::size_t GetSize() const = 0; +}; + class IStorage final : public ServiceFramework<IStorage> { public: - explicit IStorage(std::vector<u8> buffer); + explicit IStorage(std::vector<u8>&& buffer); ~IStorage() override; - const std::vector<u8>& GetData() const; + std::vector<u8>& GetData() { + return impl->GetData(); + } + + const std::vector<u8>& GetData() const { + return impl->GetData(); + } + + std::size_t GetSize() const { + return impl->GetSize(); + } private: + void Register(); void Open(Kernel::HLERequestContext& ctx); - std::vector<u8> buffer; - - friend class IStorageAccessor; + std::shared_ptr<IStorageImpl> impl; }; class IStorageAccessor final : public ServiceFramework<IStorageAccessor> { diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp index 92f995f8f..c3261f3e6 100644 --- a/src/core/hle/service/am/applets/applets.cpp +++ b/src/core/hle/service/am/applets/applets.cpp @@ -50,16 +50,17 @@ AppletDataBroker::RawChannelData AppletDataBroker::PeekDataToAppletForDebug() co return {std::move(out_normal), std::move(out_interactive)}; } -std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() { +std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() { if (out_channel.empty()) return nullptr; auto out = std::move(out_channel.front()); out_channel.pop_front(); + pop_out_data_event.writable->Clear(); return out; } -std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() { +std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() { if (in_channel.empty()) return nullptr; @@ -68,16 +69,17 @@ std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() { return out; } -std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() { +std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() { if (out_interactive_channel.empty()) return nullptr; auto out = std::move(out_interactive_channel.front()); out_interactive_channel.pop_front(); + pop_interactive_out_data_event.writable->Clear(); return out; } -std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() { +std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() { if (in_interactive_channel.empty()) return nullptr; @@ -86,21 +88,21 @@ std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() { return out; } -void AppletDataBroker::PushNormalDataFromGame(IStorage storage) { - in_channel.push_back(std::make_unique<IStorage>(storage)); +void AppletDataBroker::PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage) { + in_channel.emplace_back(std::move(storage)); } -void AppletDataBroker::PushNormalDataFromApplet(IStorage storage) { - out_channel.push_back(std::make_unique<IStorage>(storage)); +void AppletDataBroker::PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage) { + out_channel.emplace_back(std::move(storage)); pop_out_data_event.writable->Signal(); } -void AppletDataBroker::PushInteractiveDataFromGame(IStorage storage) { - in_interactive_channel.push_back(std::make_unique<IStorage>(storage)); +void AppletDataBroker::PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage) { + in_interactive_channel.emplace_back(std::move(storage)); } -void AppletDataBroker::PushInteractiveDataFromApplet(IStorage storage) { - out_interactive_channel.push_back(std::make_unique<IStorage>(storage)); +void AppletDataBroker::PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage) { + out_interactive_channel.emplace_back(std::move(storage)); pop_interactive_out_data_event.writable->Signal(); } diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h index 16e61fc6f..e75be86a2 100644 --- a/src/core/hle/service/am/applets/applets.h +++ b/src/core/hle/service/am/applets/applets.h @@ -72,17 +72,17 @@ public: // Retrieves but does not pop the data sent to applet. RawChannelData PeekDataToAppletForDebug() const; - std::unique_ptr<IStorage> PopNormalDataToGame(); - std::unique_ptr<IStorage> PopNormalDataToApplet(); + std::shared_ptr<IStorage> PopNormalDataToGame(); + std::shared_ptr<IStorage> PopNormalDataToApplet(); - std::unique_ptr<IStorage> PopInteractiveDataToGame(); - std::unique_ptr<IStorage> PopInteractiveDataToApplet(); + std::shared_ptr<IStorage> PopInteractiveDataToGame(); + std::shared_ptr<IStorage> PopInteractiveDataToApplet(); - void PushNormalDataFromGame(IStorage storage); - void PushNormalDataFromApplet(IStorage storage); + void PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage); + void PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage); - void PushInteractiveDataFromGame(IStorage storage); - void PushInteractiveDataFromApplet(IStorage storage); + void PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage); + void PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage); void SignalStateChanged() const; @@ -94,16 +94,16 @@ private: // Queues are named from applet's perspective // PopNormalDataToApplet and PushNormalDataFromGame - std::deque<std::unique_ptr<IStorage>> in_channel; + std::deque<std::shared_ptr<IStorage>> in_channel; // PopNormalDataToGame and PushNormalDataFromApplet - std::deque<std::unique_ptr<IStorage>> out_channel; + std::deque<std::shared_ptr<IStorage>> out_channel; // PopInteractiveDataToApplet and PushInteractiveDataFromGame - std::deque<std::unique_ptr<IStorage>> in_interactive_channel; + std::deque<std::shared_ptr<IStorage>> in_interactive_channel; // PopInteractiveDataToGame and PushInteractiveDataFromApplet - std::deque<std::unique_ptr<IStorage>> out_interactive_channel; + std::deque<std::shared_ptr<IStorage>> out_interactive_channel; Kernel::EventPair state_changed_event; diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp index eab0d42c9..f12fd7f89 100644 --- a/src/core/hle/service/am/applets/error.cpp +++ b/src/core/hle/service/am/applets/error.cpp @@ -186,7 +186,7 @@ void Error::Execute() { void Error::DisplayCompleted() { complete = true; - broker.PushNormalDataFromApplet(IStorage{{}}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{})); broker.SignalStateChanged(); } diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp index 328438a1d..104501ac5 100644 --- a/src/core/hle/service/am/applets/general_backend.cpp +++ b/src/core/hle/service/am/applets/general_backend.cpp @@ -20,7 +20,7 @@ namespace Service::AM::Applets { constexpr ResultCode ERROR_INVALID_PIN{ErrorModule::PCTL, 221}; static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) { - std::unique_ptr<IStorage> storage = broker.PopNormalDataToApplet(); + std::shared_ptr<IStorage> storage = broker.PopNormalDataToApplet(); for (; storage != nullptr; storage = broker.PopNormalDataToApplet()) { const auto data = storage->GetData(); LOG_INFO(Service_AM, @@ -148,7 +148,7 @@ void Auth::AuthFinished(bool successful) { std::vector<u8> out(sizeof(Return)); std::memcpy(out.data(), &return_, sizeof(Return)); - broker.PushNormalDataFromApplet(IStorage{out}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(out))); broker.SignalStateChanged(); } @@ -198,7 +198,7 @@ void PhotoViewer::Execute() { } void PhotoViewer::ViewFinished() { - broker.PushNormalDataFromApplet(IStorage{{}}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{})); broker.SignalStateChanged(); } @@ -234,8 +234,8 @@ void StubApplet::ExecuteInteractive() { LOG_WARNING(Service_AM, "called (STUBBED)"); LogCurrentStorage(broker, "ExecuteInteractive"); - broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)}); - broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000))); + broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000))); broker.SignalStateChanged(); } @@ -243,8 +243,8 @@ void StubApplet::Execute() { LOG_WARNING(Service_AM, "called (STUBBED)"); LogCurrentStorage(broker, "Execute"); - broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)}); - broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000))); + broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000))); broker.SignalStateChanged(); } diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp index 3eba696ca..70cc23552 100644 --- a/src/core/hle/service/am/applets/profile_select.cpp +++ b/src/core/hle/service/am/applets/profile_select.cpp @@ -50,7 +50,7 @@ void ProfileSelect::ExecuteInteractive() { void ProfileSelect::Execute() { if (complete) { - broker.PushNormalDataFromApplet(IStorage{final_data}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data))); return; } @@ -71,7 +71,7 @@ void ProfileSelect::SelectionComplete(std::optional<Common::UUID> uuid) { final_data = std::vector<u8>(sizeof(UserSelectionOutput)); std::memcpy(final_data.data(), &output, final_data.size()); - broker.PushNormalDataFromApplet(IStorage{final_data}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data))); broker.SignalStateChanged(); } diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index 748559cd0..54e63c138 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp @@ -102,7 +102,8 @@ void SoftwareKeyboard::ExecuteInteractive() { void SoftwareKeyboard::Execute() { if (complete) { - broker.PushNormalDataFromApplet(IStorage{final_data}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data))); + broker.SignalStateChanged(); return; } @@ -119,7 +120,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) { std::vector<u8> output_sub(SWKBD_OUTPUT_BUFFER_SIZE); if (config.utf_8) { - const u64 size = text->size() + 8; + const u64 size = text->size() + sizeof(u64); const auto new_text = Common::UTF16ToUTF8(*text); std::memcpy(output_sub.data(), &size, sizeof(u64)); @@ -130,7 +131,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) { std::memcpy(output_main.data() + 4, new_text.data(), std::min(new_text.size(), SWKBD_OUTPUT_BUFFER_SIZE - 4)); } else { - const u64 size = text->size() * 2 + 8; + const u64 size = text->size() * 2 + sizeof(u64); std::memcpy(output_sub.data(), &size, sizeof(u64)); std::memcpy(output_sub.data() + 8, text->data(), std::min(text->size() * 2, SWKBD_OUTPUT_BUFFER_SIZE - 8)); @@ -144,15 +145,15 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) { final_data = output_main; if (complete) { - broker.PushNormalDataFromApplet(IStorage{output_main}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main))); broker.SignalStateChanged(); } else { - broker.PushInteractiveDataFromApplet(IStorage{output_sub}); + broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::move(output_sub))); } } else { output_main[0] = 1; complete = true; - broker.PushNormalDataFromApplet(IStorage{output_main}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main))); broker.SignalStateChanged(); } } diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp index 5546ef6e8..12443c910 100644 --- a/src/core/hle/service/am/applets/web_browser.cpp +++ b/src/core/hle/service/am/applets/web_browser.cpp @@ -284,7 +284,7 @@ void WebBrowser::Finalize() { std::vector<u8> data(sizeof(WebCommonReturnValue)); std::memcpy(data.data(), &out, sizeof(WebCommonReturnValue)); - broker.PushNormalDataFromApplet(IStorage{data}); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(data))); broker.SignalStateChanged(); if (!temporary_dir.empty() && FileUtil::IsDirectory(temporary_dir)) { diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index cb839e4a2..d19513cbb 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -170,8 +170,10 @@ public: {3, nullptr, "SetContextForMultiStream"}, {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, - {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, - {7, nullptr, "DecodeInterleavedForMultiStream"}, + {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"}, + {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"}, + {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, + {9, nullptr, "DecodeInterleavedForMultiStream"}, }; // clang-format on diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 6f5ea095a..def3410cc 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp @@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name, } bool NullBackend::Clear(u64 title_id) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}"); + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id); return true; } void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id, + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id, Common::HexToString(passphrase)); } diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index 67e39a5c4..f589864ee 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp @@ -200,7 +200,8 @@ private: DownloadResult DownloadInternal(const std::string& resolved_path, u32 timeout_seconds, const std::string& content_type_name) { if (client == nullptr) { - client = std::make_unique<httplib::SSLClient>(BOXCAT_HOSTNAME, PORT, timeout_seconds); + client = std::make_unique<httplib::SSLClient>(BOXCAT_HOSTNAME, PORT); + client->set_timeout_sec(timeout_seconds); } httplib::Headers headers{ @@ -448,8 +449,8 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title) Boxcat::StatusResult Boxcat::GetStatus(std::optional<std::string>& global, std::map<std::string, EventStatus>& games) { - httplib::SSLClient client{BOXCAT_HOSTNAME, static_cast<int>(PORT), - static_cast<int>(TIMEOUT_SECONDS)}; + httplib::SSLClient client{BOXCAT_HOSTNAME, static_cast<int>(PORT)}; + client.set_timeout_sec(static_cast<int>(TIMEOUT_SECONDS)); httplib::Headers headers{ {std::string("Game-Assets-API-Version"), std::string(BOXCAT_API_VERSION)}, diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 55d62fc5e..e6811d5b5 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -420,7 +420,7 @@ public: return; } - IFile file(result.Unwrap()); + auto file = std::make_shared<IFile>(result.Unwrap()); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); @@ -445,7 +445,7 @@ public: return; } - IDirectory directory(result.Unwrap()); + auto directory = std::make_shared<IDirectory>(result.Unwrap()); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); @@ -794,8 +794,8 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) { void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_FS, "called"); - IFileSystem filesystem(fsc.OpenSDMC().Unwrap(), - SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard)); + auto filesystem = std::make_shared<IFileSystem>( + fsc.OpenSDMC().Unwrap(), SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard)); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); @@ -846,7 +846,8 @@ void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) { id = FileSys::StorageId::NandSystem; } - IFileSystem filesystem(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id)); + auto filesystem = + std::make_shared<IFileSystem>(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id)); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); @@ -898,7 +899,7 @@ void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { return; } - IStorage storage(std::move(romfs.Unwrap())); + auto storage = std::make_shared<IStorage>(std::move(romfs.Unwrap())); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); @@ -937,7 +938,8 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) { FileSys::PatchManager pm{title_id}; - IStorage storage(pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data)); + auto storage = std::make_shared<IStorage>( + pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data)); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 4d952adc0..15c09f04c 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -250,6 +250,10 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { auto& rstick_entry = npad_pad_states[controller_idx].r_stick; const auto& button_state = buttons[controller_idx]; const auto& analog_state = sticks[controller_idx]; + const auto [stick_l_x_f, stick_l_y_f] = + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus(); + const auto [stick_r_x_f, stick_r_y_f] = + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus(); using namespace Settings::NativeButton; pad_state.a.Assign(button_state[A - BUTTON_HID_BEGIN]->GetStatus()); @@ -270,23 +274,32 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { pad_state.d_right.Assign(button_state[DRight - BUTTON_HID_BEGIN]->GetStatus()); pad_state.d_down.Assign(button_state[DDown - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_left.Assign(button_state[LStick_Left - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_up.Assign(button_state[LStick_Up - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_right.Assign(button_state[LStick_Right - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_down.Assign(button_state[LStick_Down - BUTTON_HID_BEGIN]->GetStatus()); - - pad_state.r_stick_left.Assign(button_state[RStick_Left - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_up.Assign(button_state[RStick_Up - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_right.Assign(button_state[RStick_Right - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_down.Assign(button_state[RStick_Down - BUTTON_HID_BEGIN]->GetStatus()); + pad_state.l_stick_right.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::RIGHT)); + pad_state.l_stick_left.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::LEFT)); + pad_state.l_stick_up.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::UP)); + pad_state.l_stick_down.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::DOWN)); + + pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT)); + pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT)); + pad_state.r_stick_right.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::UP)); + pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN)); pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus()); pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus()); - const auto [stick_l_x_f, stick_l_y_f] = - analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus(); - const auto [stick_r_x_f, stick_r_y_f] = - analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus(); lstick_entry.x = static_cast<s32>(stick_l_x_f * HID_JOYSTICK_MAX); lstick_entry.y = static_cast<s32>(stick_l_y_f * HID_JOYSTICK_MAX); rstick_entry.x = static_cast<s32>(stick_r_x_f * HID_JOYSTICK_MAX); diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 89bf8b815..e6b56a9f9 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -10,6 +10,7 @@ #include "core/core_timing_util.h" #include "core/frontend/emu_window.h" #include "core/frontend/input.h" +#include "core/hardware_properties.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" @@ -37,11 +38,11 @@ namespace Service::HID { // Updating period for each HID device. // TODO(ogniK): Find actual polling rate of hid -constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66); +constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66); [[maybe_unused]] constexpr s64 accelerometer_update_ticks = - static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100); + static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); [[maybe_unused]] constexpr s64 gyroscope_update_ticks = - static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100); + static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; IAppletResource::IAppletResource(Core::System& system) diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index ed5059047..92adde6d4 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -129,12 +129,20 @@ public: {304, nullptr, "Disconnect"}, {400, nullptr, "Initialize"}, {401, nullptr, "Finalize"}, - {402, nullptr, "SetOperationMode"}, + {402, &IUserLocalCommunicationService::Initialize2, "Initialize2"}, // 7.0.0+ }; // clang-format on RegisterHandlers(functions); } + + void Initialize2(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_LDN, "(STUBBED) called"); + // Result success seem make this services start network and continue. + // If we just pass result error then it will stop and maybe try again and again. + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_UNKNOWN); + } }; class LDNS final : public ServiceFramework<LDNS> { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 6d8bca8bb..f1966ac0e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -44,6 +44,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve return GetWaitbase(input, output); case IoctlCommand::IocChannelSetTimeoutCommand: return ChannelSetTimeout(input, output); + case IoctlCommand::IocChannelSetTimeslice: + return ChannelSetTimeslice(input, output); default: break; } @@ -228,4 +230,14 @@ u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& return 0; } +u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSetTimeslice params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSetTimeslice)); + LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); + + channel_timeslice = params.timeslice; + + return 0; +} + } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index d056dd046..2ac74743f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -48,6 +48,7 @@ private: IocAllocObjCtxCommand = 0xC0104809, IocChannelGetWaitbaseCommand = 0xC0080003, IocChannelSetTimeoutCommand = 0x40044803, + IocChannelSetTimeslice = 0xC004481D, }; enum class CtxObjects : u32_le { @@ -101,6 +102,11 @@ private: static_assert(sizeof(IoctlChannelSetPriority) == 4, "IoctlChannelSetPriority is incorrect size"); + struct IoctlSetTimeslice { + u32_le timeslice; + }; + static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size"); + struct IoctlEventIdControl { u32_le cmd; // 0=disable, 1=enable, 2=clear u32_le id; @@ -174,6 +180,7 @@ private: u64_le user_data{}; IoctlZCullBind zcull_params{}; u32_le channel_priority{}; + u32_le channel_timeslice{}; u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output); @@ -188,6 +195,7 @@ private: const std::vector<u8>& input2, IoctlVersion version); u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); + u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); std::shared_ptr<nvmap> nvmap_dev; u32 assigned_syncpoints{}; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 62752e419..134152210 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -12,6 +12,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" @@ -26,8 +27,8 @@ namespace Service::NVFlinger { -constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60); -constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30); +constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); +constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30); NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(0, "Default", system); @@ -222,7 +223,7 @@ void NVFlinger::Compose() { s64 NVFlinger::GetNextTicks() const { constexpr s64 max_hertz = 120LL; - return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; + return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp index 5eb26caf8..8f1be0e48 100644 --- a/src/core/hle/service/prepo/prepo.cpp +++ b/src/core/hle/service/prepo/prepo.cpp @@ -50,16 +50,16 @@ private: IPC::RequestParser rp{ctx}; const auto process_id = rp.PopRaw<u64>(); - const auto data1 = ctx.ReadBuffer(0); - const auto data2 = ctx.ReadBuffer(1); + std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)}; + if (Type == Core::Reporter::PlayReportType::New) { + data.emplace_back(ctx.ReadBuffer(1)); + } - LOG_DEBUG(Service_PREPO, - "called, type={:02X}, process_id={:016X}, data1_size={:016X}, data2_size={:016X}", - static_cast<u8>(Type), process_id, data1.size(), data2.size()); + LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}", + static_cast<u8>(Type), process_id, data[0].size()); const auto& reporter{system.GetReporter()}; - reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2}, - process_id); + reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -70,19 +70,19 @@ private: IPC::RequestParser rp{ctx}; const auto user_id = rp.PopRaw<u128>(); const auto process_id = rp.PopRaw<u64>(); - - const auto data1 = ctx.ReadBuffer(0); - const auto data2 = ctx.ReadBuffer(1); + std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)}; + if (Type == Core::Reporter::PlayReportType::New) { + data.emplace_back(ctx.ReadBuffer(1)); + } LOG_DEBUG( Service_PREPO, - "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}, " - "data2_size={:016X}", - static_cast<u8>(Type), user_id[1], user_id[0], process_id, data1.size(), data2.size()); + "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}", + static_cast<u8>(Type), user_id[1], user_id[0], process_id, data[0].size()); const auto& reporter{system.GetReporter()}; - reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2}, - process_id, user_id); + reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id, + user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index 884ad173b..f67fab2f9 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -42,6 +42,26 @@ void BSD::Socket(Kernel::HLERequestContext& ctx) { rb.Push<u32>(0); // bsd errno } +void BSD::Select(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + +void BSD::Bind(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + void BSD::Connect(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service, "(STUBBED) called"); @@ -52,6 +72,26 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) { rb.Push<u32>(0); // bsd errno } +void BSD::Listen(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + +void BSD::SetSockOpt(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + void BSD::SendTo(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service, "(STUBBED) called"); @@ -80,7 +120,7 @@ BSD::BSD(const char* name) : ServiceFramework(name) { {2, &BSD::Socket, "Socket"}, {3, nullptr, "SocketExempt"}, {4, nullptr, "Open"}, - {5, nullptr, "Select"}, + {5, &BSD::Select, "Select"}, {6, nullptr, "Poll"}, {7, nullptr, "Sysctl"}, {8, nullptr, "Recv"}, @@ -88,15 +128,15 @@ BSD::BSD(const char* name) : ServiceFramework(name) { {10, nullptr, "Send"}, {11, &BSD::SendTo, "SendTo"}, {12, nullptr, "Accept"}, - {13, nullptr, "Bind"}, + {13, &BSD::Bind, "Bind"}, {14, &BSD::Connect, "Connect"}, {15, nullptr, "GetPeerName"}, {16, nullptr, "GetSockName"}, {17, nullptr, "GetSockOpt"}, - {18, nullptr, "Listen"}, + {18, &BSD::Listen, "Listen"}, {19, nullptr, "Ioctl"}, {20, nullptr, "Fcntl"}, - {21, nullptr, "SetSockOpt"}, + {21, &BSD::SetSockOpt, "SetSockOpt"}, {22, nullptr, "Shutdown"}, {23, nullptr, "ShutdownAllSockets"}, {24, nullptr, "Write"}, diff --git a/src/core/hle/service/sockets/bsd.h b/src/core/hle/service/sockets/bsd.h index 0fe0e65c6..3098e3baf 100644 --- a/src/core/hle/service/sockets/bsd.h +++ b/src/core/hle/service/sockets/bsd.h @@ -18,7 +18,11 @@ private: void RegisterClient(Kernel::HLERequestContext& ctx); void StartMonitoring(Kernel::HLERequestContext& ctx); void Socket(Kernel::HLERequestContext& ctx); + void Select(Kernel::HLERequestContext& ctx); + void Bind(Kernel::HLERequestContext& ctx); void Connect(Kernel::HLERequestContext& ctx); + void Listen(Kernel::HLERequestContext& ctx); + void SetSockOpt(Kernel::HLERequestContext& ctx); void SendTo(Kernel::HLERequestContext& ctx); void Close(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index ca1a783fc..1575f0b49 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -5,6 +5,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/service/time/standard_steady_clock_core.h" namespace Service::Time::Clock { @@ -12,7 +13,7 @@ namespace Service::Time::Clock { TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index c77b98189..44d5bc651 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -5,6 +5,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/service/time/tick_based_steady_clock_core.h" namespace Service::Time::Clock { @@ -12,7 +13,7 @@ namespace Service::Time::Clock { SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; return {ticks_time_span.ToSeconds(), GetClockSourceId()}; } diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 8ef4efcef..749b7be70 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -6,6 +6,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" @@ -233,7 +234,7 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { const auto ticks{Clock::TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; const s64 base_time_point{context.offset + current_time_point.time_point - ticks.ToSeconds()}; IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index 9b03191bf..fdaef233f 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -5,6 +5,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/service/time/clock_types.h" #include "core/hle/service/time/steady_clock_core.h" #include "core/hle/service/time/time_sharedmemory.h" @@ -31,7 +32,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system, Clock::TimeSpanType current_time_point) { const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; const Clock::SteadyClockContext context{ static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), clock_source_id}; diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 515c5accb..044067a5b 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -97,7 +97,8 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, if (nso_header.IsSegmentCompressed(i)) { data = DecompressSegment(data, nso_header.segments[i]); } - program_image.resize(nso_header.segments[i].location + data.size()); + program_image.resize(nso_header.segments[i].location + + PageAlignSize(static_cast<u32>(data.size()))); std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), data.size()); codeset.segments[i].addr = nso_header.segments[i].location; @@ -105,8 +106,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); } - if (should_pass_arguments && !Settings::values.program_args.empty()) { - const auto arg_data = Settings::values.program_args; + if (should_pass_arguments) { + std::vector<u8> arg_data{Settings::values.program_args.begin(), + Settings::values.program_args.end()}; + if (arg_data.empty()) { + arg_data.resize(NSO_ARGUMENT_DEFAULT_SIZE); + } codeset.DataSegment().size += NSO_ARGUMENT_DATA_ALLOCATION_SIZE; NSOArgumentHeader args_header{ NSO_ARGUMENT_DATA_ALLOCATION_SIZE, static_cast<u32_le>(arg_data.size()), {}}; diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index 58cbe162d..d2d600cd9 100644 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h @@ -56,6 +56,8 @@ static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size."); static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable."); constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000; +// NOTE: Official software default argument state is unverified. +constexpr u64 NSO_ARGUMENT_DEFAULT_SIZE = 1; struct NSOArgumentHeader { u32_le allocated_size; diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index d1e6bed93..4472500d2 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -9,6 +9,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/process.h" #include "core/hle/service/hid/controllers/npad.h" #include "core/hle/service/hid/hid.h" @@ -17,7 +18,7 @@ namespace Memory { -constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 12); +constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12); constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata) diff --git a/src/core/settings.h b/src/core/settings.h index 9c98a9287..f837d3fbc 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -371,6 +371,11 @@ enum class SDMCSize : u64 { S1TB = 0x10000000000ULL, }; +enum class RendererBackend { + OpenGL = 0, + Vulkan = 1, +}; + struct Values { // System bool use_docked_mode; @@ -401,6 +406,9 @@ struct Values { std::string motion_device; TouchscreenInput touchscreen; std::atomic_bool is_device_reload_pending{true}; + std::string udp_input_address; + u16 udp_input_port; + u8 udp_pad_index; // Core bool use_multi_core; @@ -416,7 +424,12 @@ struct Values { SDMCSize sdmc_size; // Renderer + RendererBackend renderer_backend; + bool renderer_debug; + int vulkan_device; + float resolution_factor; + int aspect_ratio; bool use_frame_limit; u16 frame_limit; bool use_disk_shader_cache; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 320e8ad73..0e72d31cd 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -46,6 +46,16 @@ static u64 GenerateTelemetryId() { return telemetry_id; } +static const char* TranslateRenderer(Settings::RendererBackend backend) { + switch (backend) { + case Settings::RendererBackend::OpenGL: + return "OpenGL"; + case Settings::RendererBackend::Vulkan: + return "Vulkan"; + } + return "Unknown"; +} + u64 GetTelemetryId() { u64 telemetry_id{}; const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + @@ -169,7 +179,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { AddField(field_type, "Audio_SinkId", Settings::values.sink_id); AddField(field_type, "Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core); - AddField(field_type, "Renderer_Backend", "OpenGL"); + AddField(field_type, "Renderer_Backend", TranslateRenderer(Settings::values.renderer_backend)); AddField(field_type, "Renderer_ResolutionFactor", Settings::values.resolution_factor); AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index 55e0dbc49..1e060f009 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -7,13 +7,14 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/memory.h" #include "core/tools/freezer.h" namespace Tools { namespace { -constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60); +constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) { switch (width) { diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index 5b4e032bd..2520ba321 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -9,6 +9,12 @@ add_library(input_common STATIC motion_emu.h sdl/sdl.cpp sdl/sdl.h + udp/client.cpp + udp/client.h + udp/protocol.cpp + udp/protocol.h + udp/udp.cpp + udp/udp.h ) if(SDL2_FOUND) @@ -21,4 +27,4 @@ if(SDL2_FOUND) endif() create_target_directory_groups(input_common) -target_link_libraries(input_common PUBLIC core PRIVATE common) +target_link_libraries(input_common PUBLIC core PRIVATE common ${Boost_LIBRARIES}) diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 8e66c1b15..c98c848cf 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -9,6 +9,7 @@ #include "input_common/keyboard.h" #include "input_common/main.h" #include "input_common/motion_emu.h" +#include "input_common/udp/udp.h" #ifdef HAVE_SDL2 #include "input_common/sdl/sdl.h" #endif @@ -18,6 +19,7 @@ namespace InputCommon { static std::shared_ptr<Keyboard> keyboard; static std::shared_ptr<MotionEmu> motion_emu; static std::unique_ptr<SDL::State> sdl; +static std::unique_ptr<CemuhookUDP::State> udp; void Init() { keyboard = std::make_shared<Keyboard>(); @@ -28,6 +30,8 @@ void Init() { Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu); sdl = SDL::Init(); + + udp = CemuhookUDP::Init(); } void Shutdown() { @@ -37,6 +41,7 @@ void Shutdown() { Input::UnregisterFactory<Input::MotionDevice>("motion_emu"); motion_emu.reset(); sdl.reset(); + udp.reset(); } Keyboard* GetKeyboard() { @@ -72,11 +77,13 @@ std::string GenerateAnalogParamFromKeys(int key_up, int key_down, int key_left, namespace Polling { std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) { + std::vector<std::unique_ptr<DevicePoller>> pollers; + #ifdef HAVE_SDL2 - return sdl->GetPollers(type); -#else - return {}; + pollers = sdl->GetPollers(type); #endif + + return pollers; } } // namespace Polling diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index d2e9d278f..a2e0c0bd2 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp @@ -342,6 +342,22 @@ public: return std::make_tuple<float, float>(0.0f, 0.0f); } + bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { + const auto [x, y] = GetStatus(); + const float directional_deadzone = 0.4f; + switch (direction) { + case Input::AnalogDirection::RIGHT: + return x > directional_deadzone; + case Input::AnalogDirection::LEFT: + return x < -directional_deadzone; + case Input::AnalogDirection::UP: + return y > directional_deadzone; + case Input::AnalogDirection::DOWN: + return y < -directional_deadzone; + } + return false; + } + private: std::shared_ptr<SDLJoystick> joystick; const int axis_x; diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp new file mode 100644 index 000000000..2228571a6 --- /dev/null +++ b/src/input_common/udp/client.cpp @@ -0,0 +1,286 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <chrono> +#include <cstring> +#include <functional> +#include <thread> +#include <boost/asio.hpp> +#include <boost/bind.hpp> +#include "common/logging/log.h" +#include "input_common/udp/client.h" +#include "input_common/udp/protocol.h" + +using boost::asio::ip::udp; + +namespace InputCommon::CemuhookUDP { + +struct SocketCallback { + std::function<void(Response::Version)> version; + std::function<void(Response::PortInfo)> port_info; + std::function<void(Response::PadData)> pad_data; +}; + +class Socket { +public: + using clock = std::chrono::system_clock; + + explicit Socket(const std::string& host, u16 port, u8 pad_index, u32 client_id, + SocketCallback callback) + : callback(std::move(callback)), timer(io_service), + socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), + pad_index(pad_index), + send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} + + void Stop() { + io_service.stop(); + } + + void Loop() { + io_service.run(); + } + + void StartSend(const clock::time_point& from) { + timer.expires_at(from + std::chrono::seconds(3)); + timer.async_wait([this](const boost::system::error_code& error) { HandleSend(error); }); + } + + void StartReceive() { + socket.async_receive_from( + boost::asio::buffer(receive_buffer), receive_endpoint, + [this](const boost::system::error_code& error, std::size_t bytes_transferred) { + HandleReceive(error, bytes_transferred); + }); + } + +private: + void HandleReceive(const boost::system::error_code& error, std::size_t bytes_transferred) { + if (auto type = Response::Validate(receive_buffer.data(), bytes_transferred)) { + switch (*type) { + case Type::Version: { + Response::Version version; + std::memcpy(&version, &receive_buffer[sizeof(Header)], sizeof(Response::Version)); + callback.version(std::move(version)); + break; + } + case Type::PortInfo: { + Response::PortInfo port_info; + std::memcpy(&port_info, &receive_buffer[sizeof(Header)], + sizeof(Response::PortInfo)); + callback.port_info(std::move(port_info)); + break; + } + case Type::PadData: { + Response::PadData pad_data; + std::memcpy(&pad_data, &receive_buffer[sizeof(Header)], sizeof(Response::PadData)); + callback.pad_data(std::move(pad_data)); + break; + } + } + } + StartReceive(); + } + + void HandleSend(const boost::system::error_code& error) { + // Send a request for getting port info for the pad + Request::PortInfo port_info{1, {pad_index, 0, 0, 0}}; + const auto port_message = Request::Create(port_info, client_id); + std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE); + socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint); + + // Send a request for getting pad data for the pad + Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS}; + const auto pad_message = Request::Create(pad_data, client_id); + std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE); + socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint); + StartSend(timer.expiry()); + } + + SocketCallback callback; + boost::asio::io_service io_service; + boost::asio::basic_waitable_timer<clock> timer; + udp::socket socket; + + u32 client_id{}; + u8 pad_index{}; + + static constexpr std::size_t PORT_INFO_SIZE = sizeof(Message<Request::PortInfo>); + static constexpr std::size_t PAD_DATA_SIZE = sizeof(Message<Request::PadData>); + std::array<u8, PORT_INFO_SIZE> send_buffer1; + std::array<u8, PAD_DATA_SIZE> send_buffer2; + udp::endpoint send_endpoint; + + std::array<u8, MAX_PACKET_SIZE> receive_buffer; + udp::endpoint receive_endpoint; +}; + +static void SocketLoop(Socket* socket) { + socket->StartReceive(); + socket->StartSend(Socket::clock::now()); + socket->Loop(); +} + +Client::Client(std::shared_ptr<DeviceStatus> status, const std::string& host, u16 port, + u8 pad_index, u32 client_id) + : status(std::move(status)) { + StartCommunication(host, port, pad_index, client_id); +} + +Client::~Client() { + socket->Stop(); + thread.join(); +} + +void Client::ReloadSocket(const std::string& host, u16 port, u8 pad_index, u32 client_id) { + socket->Stop(); + thread.join(); + StartCommunication(host, port, pad_index, client_id); +} + +void Client::OnVersion(Response::Version data) { + LOG_TRACE(Input, "Version packet received: {}", data.version); +} + +void Client::OnPortInfo(Response::PortInfo data) { + LOG_TRACE(Input, "PortInfo packet received: {}", data.model); +} + +void Client::OnPadData(Response::PadData data) { + LOG_TRACE(Input, "PadData packet received"); + if (data.packet_counter <= packet_sequence) { + LOG_WARNING( + Input, + "PadData packet dropped because its stale info. Current count: {} Packet count: {}", + packet_sequence, data.packet_counter); + return; + } + packet_sequence = data.packet_counter; + // TODO: Check how the Switch handles motions and how the CemuhookUDP motion + // directions correspond to the ones of the Switch + Common::Vec3f accel = Common::MakeVec<float>(data.accel.x, data.accel.y, data.accel.z); + Common::Vec3f gyro = Common::MakeVec<float>(data.gyro.pitch, data.gyro.yaw, data.gyro.roll); + { + std::lock_guard guard(status->update_mutex); + + status->motion_status = {accel, gyro}; + + // TODO: add a setting for "click" touch. Click touch refers to a device that differentiates + // between a simple "tap" and a hard press that causes the touch screen to click. + const bool is_active = data.touch_1.is_active != 0; + + float x = 0; + float y = 0; + + if (is_active && status->touch_calibration) { + const u16 min_x = status->touch_calibration->min_x; + const u16 max_x = status->touch_calibration->max_x; + const u16 min_y = status->touch_calibration->min_y; + const u16 max_y = status->touch_calibration->max_y; + + x = (std::clamp(static_cast<u16>(data.touch_1.x), min_x, max_x) - min_x) / + static_cast<float>(max_x - min_x); + y = (std::clamp(static_cast<u16>(data.touch_1.y), min_y, max_y) - min_y) / + static_cast<float>(max_y - min_y); + } + + status->touch_status = {x, y, is_active}; + } +} + +void Client::StartCommunication(const std::string& host, u16 port, u8 pad_index, u32 client_id) { + SocketCallback callback{[this](Response::Version version) { OnVersion(version); }, + [this](Response::PortInfo info) { OnPortInfo(info); }, + [this](Response::PadData data) { OnPadData(data); }}; + LOG_INFO(Input, "Starting communication with UDP input server on {}:{}", host, port); + socket = std::make_unique<Socket>(host, port, pad_index, client_id, callback); + thread = std::thread{SocketLoop, this->socket.get()}; +} + +void TestCommunication(const std::string& host, u16 port, u8 pad_index, u32 client_id, + std::function<void()> success_callback, + std::function<void()> failure_callback) { + std::thread([=] { + Common::Event success_event; + SocketCallback callback{[](Response::Version version) {}, [](Response::PortInfo info) {}, + [&](Response::PadData data) { success_event.Set(); }}; + Socket socket{host, port, pad_index, client_id, std::move(callback)}; + std::thread worker_thread{SocketLoop, &socket}; + bool result = success_event.WaitFor(std::chrono::seconds(8)); + socket.Stop(); + worker_thread.join(); + if (result) { + success_callback(); + } else { + failure_callback(); + } + }) + .detach(); +} + +CalibrationConfigurationJob::CalibrationConfigurationJob( + const std::string& host, u16 port, u8 pad_index, u32 client_id, + std::function<void(Status)> status_callback, + std::function<void(u16, u16, u16, u16)> data_callback) { + + std::thread([=] { + constexpr u16 CALIBRATION_THRESHOLD = 100; + + u16 min_x{UINT16_MAX}; + u16 min_y{UINT16_MAX}; + u16 max_x{}; + u16 max_y{}; + + Status current_status{Status::Initialized}; + SocketCallback callback{[](Response::Version version) {}, [](Response::PortInfo info) {}, + [&](Response::PadData data) { + if (current_status == Status::Initialized) { + // Receiving data means the communication is ready now + current_status = Status::Ready; + status_callback(current_status); + } + if (!data.touch_1.is_active) { + return; + } + LOG_DEBUG(Input, "Current touch: {} {}", data.touch_1.x, + data.touch_1.y); + min_x = std::min(min_x, static_cast<u16>(data.touch_1.x)); + min_y = std::min(min_y, static_cast<u16>(data.touch_1.y)); + if (current_status == Status::Ready) { + // First touch - min data (min_x/min_y) + current_status = Status::Stage1Completed; + status_callback(current_status); + } + if (data.touch_1.x - min_x > CALIBRATION_THRESHOLD && + data.touch_1.y - min_y > CALIBRATION_THRESHOLD) { + // Set the current position as max value and finishes + // configuration + max_x = data.touch_1.x; + max_y = data.touch_1.y; + current_status = Status::Completed; + data_callback(min_x, min_y, max_x, max_y); + status_callback(current_status); + + complete_event.Set(); + } + }}; + Socket socket{host, port, pad_index, client_id, std::move(callback)}; + std::thread worker_thread{SocketLoop, &socket}; + complete_event.Wait(); + socket.Stop(); + worker_thread.join(); + }) + .detach(); +} + +CalibrationConfigurationJob::~CalibrationConfigurationJob() { + Stop(); +} + +void CalibrationConfigurationJob::Stop() { + complete_event.Set(); +} + +} // namespace InputCommon::CemuhookUDP diff --git a/src/input_common/udp/client.h b/src/input_common/udp/client.h new file mode 100644 index 000000000..b8c654755 --- /dev/null +++ b/src/input_common/udp/client.h @@ -0,0 +1,95 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <memory> +#include <mutex> +#include <optional> +#include <string> +#include <thread> +#include <tuple> +#include "common/common_types.h" +#include "common/thread.h" +#include "common/vector_math.h" + +namespace InputCommon::CemuhookUDP { + +constexpr u16 DEFAULT_PORT = 26760; +constexpr char DEFAULT_ADDR[] = "127.0.0.1"; + +class Socket; + +namespace Response { +struct PadData; +struct PortInfo; +struct Version; +} // namespace Response + +struct DeviceStatus { + std::mutex update_mutex; + std::tuple<Common::Vec3<float>, Common::Vec3<float>> motion_status; + std::tuple<float, float, bool> touch_status; + + // calibration data for scaling the device's touch area to 3ds + struct CalibrationData { + u16 min_x{}; + u16 min_y{}; + u16 max_x{}; + u16 max_y{}; + }; + std::optional<CalibrationData> touch_calibration; +}; + +class Client { +public: + explicit Client(std::shared_ptr<DeviceStatus> status, const std::string& host = DEFAULT_ADDR, + u16 port = DEFAULT_PORT, u8 pad_index = 0, u32 client_id = 24872); + ~Client(); + void ReloadSocket(const std::string& host = "127.0.0.1", u16 port = 26760, u8 pad_index = 0, + u32 client_id = 24872); + +private: + void OnVersion(Response::Version); + void OnPortInfo(Response::PortInfo); + void OnPadData(Response::PadData); + void StartCommunication(const std::string& host, u16 port, u8 pad_index, u32 client_id); + + std::unique_ptr<Socket> socket; + std::shared_ptr<DeviceStatus> status; + std::thread thread; + u64 packet_sequence = 0; +}; + +/// An async job allowing configuration of the touchpad calibration. +class CalibrationConfigurationJob { +public: + enum class Status { + Initialized, + Ready, + Stage1Completed, + Completed, + }; + /** + * Constructs and starts the job with the specified parameter. + * + * @param status_callback Callback for job status updates + * @param data_callback Called when calibration data is ready + */ + explicit CalibrationConfigurationJob(const std::string& host, u16 port, u8 pad_index, + u32 client_id, std::function<void(Status)> status_callback, + std::function<void(u16, u16, u16, u16)> data_callback); + ~CalibrationConfigurationJob(); + void Stop(); + +private: + Common::Event complete_event; +}; + +void TestCommunication(const std::string& host, u16 port, u8 pad_index, u32 client_id, + std::function<void()> success_callback, + std::function<void()> failure_callback); + +} // namespace InputCommon::CemuhookUDP diff --git a/src/input_common/udp/protocol.cpp b/src/input_common/udp/protocol.cpp new file mode 100644 index 000000000..a982ac49d --- /dev/null +++ b/src/input_common/udp/protocol.cpp @@ -0,0 +1,79 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstddef> +#include <cstring> +#include "common/logging/log.h" +#include "input_common/udp/protocol.h" + +namespace InputCommon::CemuhookUDP { + +static constexpr std::size_t GetSizeOfResponseType(Type t) { + switch (t) { + case Type::Version: + return sizeof(Response::Version); + case Type::PortInfo: + return sizeof(Response::PortInfo); + case Type::PadData: + return sizeof(Response::PadData); + } + return 0; +} + +namespace Response { + +/** + * Returns Type if the packet is valid, else none + * + * Note: Modifies the buffer to zero out the crc (since thats the easiest way to check without + * copying the buffer) + */ +std::optional<Type> Validate(u8* data, std::size_t size) { + if (size < sizeof(Header)) { + LOG_DEBUG(Input, "Invalid UDP packet received"); + return std::nullopt; + } + Header header{}; + std::memcpy(&header, data, sizeof(Header)); + if (header.magic != SERVER_MAGIC) { + LOG_ERROR(Input, "UDP Packet has an unexpected magic value"); + return std::nullopt; + } + if (header.protocol_version != PROTOCOL_VERSION) { + LOG_ERROR(Input, "UDP Packet protocol mismatch"); + return std::nullopt; + } + if (header.type < Type::Version || header.type > Type::PadData) { + LOG_ERROR(Input, "UDP Packet is an unknown type"); + return std::nullopt; + } + + // Packet size must equal sizeof(Header) + sizeof(Data) + // and also verify that the packet info mentions the correct size. Since the spec includes the + // type of the packet as part of the data, we need to include it in size calculations here + // ie: payload_length == sizeof(T) + sizeof(Type) + const std::size_t data_len = GetSizeOfResponseType(header.type); + if (header.payload_length != data_len + sizeof(Type) || size < data_len + sizeof(Header)) { + LOG_ERROR( + Input, + "UDP Packet payload length doesn't match. Received: {} PayloadLength: {} Expected: {}", + size, header.payload_length, data_len + sizeof(Type)); + return std::nullopt; + } + + const u32 crc32 = header.crc; + boost::crc_32_type result; + // zero out the crc in the buffer and then run the crc against it + std::memset(&data[offsetof(Header, crc)], 0, sizeof(u32_le)); + + result.process_bytes(data, data_len + sizeof(Header)); + if (crc32 != result.checksum()) { + LOG_ERROR(Input, "UDP Packet CRC check failed. Offset: {}", offsetof(Header, crc)); + return std::nullopt; + } + return header.type; +} +} // namespace Response + +} // namespace InputCommon::CemuhookUDP diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h new file mode 100644 index 000000000..3ba4d1fc8 --- /dev/null +++ b/src/input_common/udp/protocol.h @@ -0,0 +1,255 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <optional> +#include <type_traits> +#include <boost/crc.hpp> +#include "common/bit_field.h" +#include "common/swap.h" + +namespace InputCommon::CemuhookUDP { + +constexpr std::size_t MAX_PACKET_SIZE = 100; +constexpr u16 PROTOCOL_VERSION = 1001; +constexpr u32 CLIENT_MAGIC = 0x43555344; // DSUC (but flipped for LE) +constexpr u32 SERVER_MAGIC = 0x53555344; // DSUS (but flipped for LE) + +enum class Type : u32 { + Version = 0x00100000, + PortInfo = 0x00100001, + PadData = 0x00100002, +}; + +struct Header { + u32_le magic{}; + u16_le protocol_version{}; + u16_le payload_length{}; + u32_le crc{}; + u32_le id{}; + ///> In the protocol, the type of the packet is not part of the header, but its convenient to + ///> include in the header so the callee doesn't have to duplicate the type twice when building + ///> the data + Type type{}; +}; +static_assert(sizeof(Header) == 20, "UDP Message Header struct has wrong size"); +static_assert(std::is_trivially_copyable_v<Header>, "UDP Message Header is not trivially copyable"); + +using MacAddress = std::array<u8, 6>; +constexpr MacAddress EMPTY_MAC_ADDRESS = {0, 0, 0, 0, 0, 0}; + +#pragma pack(push, 1) +template <typename T> +struct Message { + Header header{}; + T data; +}; +#pragma pack(pop) + +template <typename T> +constexpr Type GetMessageType(); + +namespace Request { + +struct Version {}; +/** + * Requests the server to send information about what controllers are plugged into the ports + * In citra's case, we only have one controller, so for simplicity's sake, we can just send a + * request explicitly for the first controller port and leave it at that. In the future it would be + * nice to make this configurable + */ +constexpr u32 MAX_PORTS = 4; +struct PortInfo { + u32_le pad_count{}; ///> Number of ports to request data for + std::array<u8, MAX_PORTS> port; +}; +static_assert(std::is_trivially_copyable_v<PortInfo>, + "UDP Request PortInfo is not trivially copyable"); + +/** + * Request the latest pad information from the server. If the server hasn't received this message + * from the client in a reasonable time frame, the server will stop sending updates. The default + * timeout seems to be 5 seconds. + */ +struct PadData { + enum class Flags : u8 { + AllPorts, + Id, + Mac, + }; + /// Determines which method will be used as a look up for the controller + Flags flags{}; + /// Index of the port of the controller to retrieve data about + u8 port_id{}; + /// Mac address of the controller to retrieve data about + MacAddress mac; +}; +static_assert(sizeof(PadData) == 8, "UDP Request PadData struct has wrong size"); +static_assert(std::is_trivially_copyable_v<PadData>, + "UDP Request PadData is not trivially copyable"); + +/** + * Creates a message with the proper header data that can be sent to the server. + * @param T data Request body to send + * @param client_id ID of the udp client (usually not checked on the server) + */ +template <typename T> +Message<T> Create(const T data, const u32 client_id = 0) { + boost::crc_32_type crc; + Header header{ + CLIENT_MAGIC, PROTOCOL_VERSION, sizeof(T) + sizeof(Type), 0, client_id, GetMessageType<T>(), + }; + Message<T> message{header, data}; + crc.process_bytes(&message, sizeof(Message<T>)); + message.header.crc = crc.checksum(); + return message; +} +} // namespace Request + +namespace Response { + +struct Version { + u16_le version{}; +}; +static_assert(sizeof(Version) == 2, "UDP Response Version struct has wrong size"); +static_assert(std::is_trivially_copyable_v<Version>, + "UDP Response Version is not trivially copyable"); + +struct PortInfo { + u8 id{}; + u8 state{}; + u8 model{}; + u8 connection_type{}; + MacAddress mac; + u8 battery{}; + u8 is_pad_active{}; +}; +static_assert(sizeof(PortInfo) == 12, "UDP Response PortInfo struct has wrong size"); +static_assert(std::is_trivially_copyable_v<PortInfo>, + "UDP Response PortInfo is not trivially copyable"); + +#pragma pack(push, 1) +struct PadData { + PortInfo info{}; + u32_le packet_counter{}; + + u16_le digital_button{}; + // The following union isn't trivially copyable but we don't use this input anyway. + // union DigitalButton { + // u16_le button; + // BitField<0, 1, u16> button_1; // Share + // BitField<1, 1, u16> button_2; // L3 + // BitField<2, 1, u16> button_3; // R3 + // BitField<3, 1, u16> button_4; // Options + // BitField<4, 1, u16> button_5; // Up + // BitField<5, 1, u16> button_6; // Right + // BitField<6, 1, u16> button_7; // Down + // BitField<7, 1, u16> button_8; // Left + // BitField<8, 1, u16> button_9; // L2 + // BitField<9, 1, u16> button_10; // R2 + // BitField<10, 1, u16> button_11; // L1 + // BitField<11, 1, u16> button_12; // R1 + // BitField<12, 1, u16> button_13; // Triangle + // BitField<13, 1, u16> button_14; // Circle + // BitField<14, 1, u16> button_15; // Cross + // BitField<15, 1, u16> button_16; // Square + // } digital_button; + + u8 home; + /// If the device supports a "click" on the touchpad, this will change to 1 when a click happens + u8 touch_hard_press{}; + u8 left_stick_x{}; + u8 left_stick_y{}; + u8 right_stick_x{}; + u8 right_stick_y{}; + + struct AnalogButton { + u8 button_8{}; + u8 button_7{}; + u8 button_6{}; + u8 button_5{}; + u8 button_12{}; + u8 button_11{}; + u8 button_10{}; + u8 button_9{}; + u8 button_16{}; + u8 button_15{}; + u8 button_14{}; + u8 button_13{}; + } analog_button; + + struct TouchPad { + u8 is_active{}; + u8 id{}; + u16_le x{}; + u16_le y{}; + } touch_1, touch_2; + + u64_le motion_timestamp; + + struct Accelerometer { + float x{}; + float y{}; + float z{}; + } accel; + + struct Gyroscope { + float pitch{}; + float yaw{}; + float roll{}; + } gyro; +}; +#pragma pack(pop) + +static_assert(sizeof(PadData) == 80, "UDP Response PadData struct has wrong size "); +static_assert(std::is_trivially_copyable_v<PadData>, + "UDP Response PadData is not trivially copyable"); + +static_assert(sizeof(Message<PadData>) == MAX_PACKET_SIZE, + "UDP MAX_PACKET_SIZE is no longer larger than Message<PadData>"); + +static_assert(sizeof(PadData::AnalogButton) == 12, + "UDP Response AnalogButton struct has wrong size "); +static_assert(sizeof(PadData::TouchPad) == 6, "UDP Response TouchPad struct has wrong size "); +static_assert(sizeof(PadData::Accelerometer) == 12, + "UDP Response Accelerometer struct has wrong size "); +static_assert(sizeof(PadData::Gyroscope) == 12, "UDP Response Gyroscope struct has wrong size "); + +/** + * Create a Response Message from the data + * @param data array of bytes sent from the server + * @return boost::none if it failed to parse or Type if it succeeded. The client can then safely + * copy the data into the appropriate struct for that Type + */ +std::optional<Type> Validate(u8* data, std::size_t size); + +} // namespace Response + +template <> +constexpr Type GetMessageType<Request::Version>() { + return Type::Version; +} +template <> +constexpr Type GetMessageType<Request::PortInfo>() { + return Type::PortInfo; +} +template <> +constexpr Type GetMessageType<Request::PadData>() { + return Type::PadData; +} +template <> +constexpr Type GetMessageType<Response::Version>() { + return Type::Version; +} +template <> +constexpr Type GetMessageType<Response::PortInfo>() { + return Type::PortInfo; +} +template <> +constexpr Type GetMessageType<Response::PadData>() { + return Type::PadData; +} +} // namespace InputCommon::CemuhookUDP diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp new file mode 100644 index 000000000..ca99cc22f --- /dev/null +++ b/src/input_common/udp/udp.cpp @@ -0,0 +1,98 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <mutex> +#include <tuple> + +#include "common/param_package.h" +#include "core/frontend/input.h" +#include "core/settings.h" +#include "input_common/udp/client.h" +#include "input_common/udp/udp.h" + +namespace InputCommon::CemuhookUDP { + +class UDPTouchDevice final : public Input::TouchDevice { +public: + explicit UDPTouchDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} + std::tuple<float, float, bool> GetStatus() const override { + std::lock_guard guard(status->update_mutex); + return status->touch_status; + } + +private: + std::shared_ptr<DeviceStatus> status; +}; + +class UDPMotionDevice final : public Input::MotionDevice { +public: + explicit UDPMotionDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} + std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override { + std::lock_guard guard(status->update_mutex); + return status->motion_status; + } + +private: + std::shared_ptr<DeviceStatus> status; +}; + +class UDPTouchFactory final : public Input::Factory<Input::TouchDevice> { +public: + explicit UDPTouchFactory(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} + + std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { + { + std::lock_guard guard(status->update_mutex); + status->touch_calibration.emplace(); + // These default values work well for DS4 but probably not other touch inputs + status->touch_calibration->min_x = params.Get("min_x", 100); + status->touch_calibration->min_y = params.Get("min_y", 50); + status->touch_calibration->max_x = params.Get("max_x", 1800); + status->touch_calibration->max_y = params.Get("max_y", 850); + } + return std::make_unique<UDPTouchDevice>(status); + } + +private: + std::shared_ptr<DeviceStatus> status; +}; + +class UDPMotionFactory final : public Input::Factory<Input::MotionDevice> { +public: + explicit UDPMotionFactory(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} + + std::unique_ptr<Input::MotionDevice> Create(const Common::ParamPackage& params) override { + return std::make_unique<UDPMotionDevice>(status); + } + +private: + std::shared_ptr<DeviceStatus> status; +}; + +State::State() { + auto status = std::make_shared<DeviceStatus>(); + client = + std::make_unique<Client>(status, Settings::values.udp_input_address, + Settings::values.udp_input_port, Settings::values.udp_pad_index); + + Input::RegisterFactory<Input::TouchDevice>("cemuhookudp", + std::make_shared<UDPTouchFactory>(status)); + Input::RegisterFactory<Input::MotionDevice>("cemuhookudp", + std::make_shared<UDPMotionFactory>(status)); +} + +State::~State() { + Input::UnregisterFactory<Input::TouchDevice>("cemuhookudp"); + Input::UnregisterFactory<Input::MotionDevice>("cemuhookudp"); +} + +void State::ReloadUDPClient() { + client->ReloadSocket(Settings::values.udp_input_address, Settings::values.udp_input_port, + Settings::values.udp_pad_index); +} + +std::unique_ptr<State> Init() { + return std::make_unique<State>(); +} +} // namespace InputCommon::CemuhookUDP diff --git a/src/input_common/udp/udp.h b/src/input_common/udp/udp.h new file mode 100644 index 000000000..4f83f0441 --- /dev/null +++ b/src/input_common/udp/udp.h @@ -0,0 +1,25 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> + +namespace InputCommon::CemuhookUDP { + +class Client; + +class State { +public: + State(); + ~State(); + void ReloadUDPClient(); + +private: + std::unique_ptr<Client> client; +}; + +std::unique_ptr<State> Init(); + +} // namespace InputCommon::CemuhookUDP diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ccfed4f2e..4b0c6346f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,12 +29,15 @@ add_library(video_core STATIC gpu_synch.h gpu_thread.cpp gpu_thread.h + guest_driver.cpp + guest_driver.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp memory_manager.h morton.cpp morton.h + query_cache.h rasterizer_accelerated.cpp rasterizer_accelerated.h rasterizer_cache.cpp @@ -72,6 +75,8 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h + renderer_opengl/gl_query_cache.cpp + renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h @@ -154,6 +159,7 @@ if (ENABLE_VULKAN) renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h renderer_vulkan/renderer_vulkan.h + renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp renderer_vulkan/vk_blit_screen.h renderer_vulkan/vk_buffer_cache.cpp @@ -174,6 +180,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_query_cache.cpp + renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0510ed777..186aca61d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -101,7 +101,10 @@ public: void TickFrame() { ++epoch; while (!pending_destruction.empty()) { - if (pending_destruction.front()->GetEpoch() + 1 > epoch) { + // Delay at least 4 frames before destruction. + // This is due to triple buffering happening on some drivers. + static constexpr u64 epochs_to_destroy = 5; + if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { break; } pending_destruction.pop_front(); diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index 44b8b8d22..d56a47710 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { @@ -106,6 +107,9 @@ public: virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const = 0; virtual u32 GetBoundBuffer() const = 0; + + virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; + virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 110406f2f..4b824aa4e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con return result; } +VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + +const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 4ef3e0613..eeb79c56f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -218,6 +218,10 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 58dfa8033..b28de1092 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,17 +4,21 @@ #include <cinttypes> #include <cstring> +#include <optional> #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { +using VideoCore::QueryType; + /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -399,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessQueryCondition(); break; } + case MAXWELL3D_REG_INDEX(counter_reset): { + ProcessCounterReset(); + break; + } case MAXWELL3D_REG_INDEX(sync_info): { ProcessSyncPoint(); break; @@ -481,7 +489,7 @@ void Maxwell3D::FlushMMEInlineDraw() { const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { - rasterizer.DrawMultiBatch(is_indexed); + rasterizer.Draw(is_indexed, true); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -519,61 +527,51 @@ void Maxwell3D::ProcessFirmwareCall4() { regs.reg_array[0xd00] = 1; } -void Maxwell3D::ProcessQueryGet() { +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { + struct LongQueryResult { + u64_le value; + u64_le timestamp; + }; + static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); const GPUVAddr sequence_address{regs.query.QueryAddress()}; - // Since the sequence address is given as a GPU VAddr, we have to convert it to an application - // VAddr before writing. + if (long_query) { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{payload, system.GPU().GetTicks()}; + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + } else { + memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); + } +} +void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - u64 result = 0; - - // TODO(Subv): Support the other query variables - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - // This seems to actually write the query sequence to the query address. - result = regs.query.query_sequence; + switch (regs.query.query_get.operation) { + case Regs::QueryOperation::Release: + StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); - } - - // TODO(Subv): Research and implement how query sync conditions work. - - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); - - switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: - case Regs::QueryMode::Write2: { - u32 sequence = regs.query.query_sequence; - if (regs.query.query_get.short_query) { - // Write the current query sequence to the sequence address. - // TODO(Subv): Find out what happens if you use a long query type but mark it as a short - // query. - memory_manager.Write<u32>(sequence_address, sequence); - } else { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{}; - query_result.value = result; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + case Regs::QueryOperation::Acquire: + // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that + // matches the current payload. + UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); + break; + case Regs::QueryOperation::Counter: + if (const std::optional<u64> result = GetQueryResult()) { + // If the query returns an empty optional it means it's cached and deferred. + // In this case we have a non-empty result, so we stamp it immediately. + StampQueryResult(*result, regs.query.query_get.short_query == 0); } break; - } + case Regs::QueryOperation::Trap: + UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); + break; default: - UNIMPLEMENTED_MSG("Query mode {} not implemented", - static_cast<u32>(regs.query.query_get.mode.Value())); + UNIMPLEMENTED_MSG("Unknown query operation"); + break; } } @@ -590,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() { } case Regs::ConditionMode::ResNonZero: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; break; } case Regs::ConditionMode::Equal: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; break; } case Regs::ConditionMode::NotEqual: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; break; @@ -616,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() { } } +void Maxwell3D::ProcessCounterReset() { + switch (regs.counter_reset) { + case Regs::CounterReset::SampleCnt: + rasterizer.ResetCounter(QueryType::SamplesPassed); + break; + default: + LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", + static_cast<int>(regs.counter_reset)); + break; + } +} + void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); @@ -644,7 +654,7 @@ void Maxwell3D::DrawArrays() { const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; if (ShouldExecute()) { - rasterizer.DrawBatch(is_indexed); + rasterizer.Draw(is_indexed, false); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -658,6 +668,22 @@ void Maxwell3D::DrawArrays() { } } +std::optional<u64> Maxwell3D::GetQueryResult() { + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + return 0; + case Regs::QuerySelect::SamplesPassed: + // Deferred. + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); + return {}; + default: + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); + return 1; + } +} + void Maxwell3D::ProcessCBBind(std::size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. auto& shader = state.shader_stages[stage_index]; @@ -784,4 +810,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b return result; } +VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + +const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ee79260fc..26939be3f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include <array> #include <bitset> +#include <optional> #include <type_traits> #include <unordered_map> #include <vector> @@ -71,12 +72,11 @@ public: static constexpr std::size_t MaxConstBuffers = 18; static constexpr std::size_t MaxConstBufferSize = 0x10000; - enum class QueryMode : u32 { - Write = 0, - Sync = 1, - // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 - // is. - Write2 = 2, + enum class QueryOperation : u32 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, }; enum class QueryUnit : u32 { @@ -410,6 +410,27 @@ public: Linear = 1, }; + enum class CounterReset : u32 { + SampleCnt = 0x01, + Unk02 = 0x02, + Unk03 = 0x03, + Unk04 = 0x04, + EmittedPrimitives = 0x10, // Not tested + Unk11 = 0x11, + Unk12 = 0x12, + Unk13 = 0x13, + Unk15 = 0x15, + Unk16 = 0x16, + Unk17 = 0x17, + Unk18 = 0x18, + Unk1A = 0x1A, + Unk1B = 0x1B, + Unk1C = 0x1C, + Unk1D = 0x1D, + Unk1E = 0x1E, + GeneratedPrimitives = 0x1F, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -704,8 +725,8 @@ public: INSERT_UNION_PADDING_WORDS(0x15); s32 stencil_back_func_ref; - u32 stencil_back_func_mask; u32 stencil_back_mask; + u32 stencil_back_func_mask; INSERT_UNION_PADDING_WORDS(0xC); @@ -858,11 +879,19 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_UNION_PADDING_WORDS(0x1); + u32 samplecnt_enable; float point_size; - INSERT_UNION_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x1); + + u32 point_sprite_enable; + + INSERT_UNION_PADDING_WORDS(0x3); + + CounterReset counter_reset; + + INSERT_UNION_PADDING_WORDS(0x1); u32 zeta_enable; @@ -1077,7 +1106,7 @@ public: u32 query_sequence; union { u32 raw; - BitField<0, 2, QueryMode> mode; + BitField<0, 2, QueryOperation> operation; BitField<4, 1, u32> fence; BitField<12, 4, QueryUnit> unit; BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1306,6 +1335,10 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array<u32, 0x40000>; @@ -1405,9 +1438,15 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); - // Handles Conditional Rendering + /// Writes the query result accordingly. + void StampQueryResult(u64 payload, bool long_query); + + /// Handles conditional rendering. void ProcessQueryCondition(); + /// Handles counter resets. + void ProcessCounterReset(); + /// Handles writes to syncing register. void ProcessSyncPoint(); @@ -1424,6 +1463,9 @@ private: // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); + + /// Returns a query's value or an empty object if the value will be deferred through a cache. + std::optional<u64> GetQueryResult(); }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1454,8 +1496,8 @@ ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); ASSERT_REG_POSITION(patch_vertices, 0x373); ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); -ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6); -ASSERT_REG_POSITION(stencil_back_mask, 0x3D7); +ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); +ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(depth_bounds, 0x3E7); @@ -1489,7 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); +ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); +ASSERT_REG_POSITION(point_sprite_enable, 0x548); +ASSERT_REG_POSITION(counter_reset, 0x54C); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6f98bd827..c9bc83cd7 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -227,6 +227,28 @@ enum class AtomicOp : u64 { Exch = 8, }; +enum class GlobalAtomicOp : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, + SafeAdd = 10, +}; + +enum class GlobalAtomicType : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32_FTZ_RN = 3, + F16x2_FTZ_RN = 4, + S64 = 5, +}; + enum class UniformType : u64 { UnsignedByte = 0, SignedByte = 1, @@ -602,6 +624,19 @@ enum class ShuffleOperation : u64 { Bfly = 3, // shuffleXorNV }; +enum class ShfType : u64 { + Bits32 = 0, + U64 = 2, + S64 = 3, +}; + +enum class ShfXmode : u64 { + None = 0, + HI = 1, + X = 2, + XHI = 3, +}; + union Instruction { constexpr Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -754,6 +789,13 @@ union Instruction { } shr; union { + BitField<37, 2, ShfType> type; + BitField<48, 2, ShfXmode> xmode; + BitField<50, 1, u64> wrap; + BitField<20, 6, u64> immediate; + } shf; + + union { BitField<39, 5, u64> shift_amount; BitField<48, 1, u64> negate_b; BitField<49, 1, u64> negate_a; @@ -958,6 +1000,12 @@ union Instruction { } stg; union { + BitField<52, 4, GlobalAtomicOp> operation; + BitField<49, 3, GlobalAtomicType> type; + BitField<28, 20, s64> offset; + } atom; + + union { BitField<52, 4, AtomicOp> operation; BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; @@ -1096,6 +1144,11 @@ union Instruction { } fset; union { + BitField<47, 1, u64> ftz; + BitField<48, 4, PredCondition> cond; + } fcmp; + + union { BitField<49, 1, u64> bf; BitField<35, 3, PredCondition> cond; BitField<50, 1, u64> ftz; @@ -1624,11 +1677,11 @@ union Instruction { } xmad; union { - BitField<20, 14, u64> offset; + BitField<20, 14, u64> shifted_offset; BitField<34, 5, u64> index; u64 GetOffset() const { - return offset * 4; + return shifted_offset * 4; } } cbuf34; @@ -1675,6 +1728,7 @@ public: BFE_C, BFE_R, BFE_IMM, + BFI_RC, BFI_IMM_R, BRA, BRX, @@ -1690,6 +1744,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1771,6 +1826,7 @@ public: ICMP_R, ICMP_CR, ICMP_IMM, + FCMP_R, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -1994,6 +2050,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), @@ -2074,6 +2131,7 @@ private: INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), + INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), @@ -2098,6 +2156,7 @@ private: INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), + INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b9c5c41a2..7d7137109 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::GetTicks() const { + // This values were reversed engineered by fincs from NVN + // The gpu clock is reported in units of 385/625 nanoseconds + constexpr u64 gpu_ticks_num = 384; + constexpr u64 gpu_ticks_den = 625; + + const u64 cpu_ticks = system.CoreTiming().GetTicks(); + const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; + const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; + return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; +} + void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = system.CoreTiming().GetTicks(); + block.timestamp = GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public: bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + u64 GetTicks() const; + std::unique_lock<std::mutex> LockSync() { return std::unique_lock{sync_mutex}; } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 08dc96bb3..882e2d9c7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -86,7 +86,7 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - using CommandQueue = Common::SPSCQueue<CommandDataContainer>; + using CommandQueue = Common::MPSCQueue<CommandDataContainer>; CommandQueue queue; u64 last_fence{}; std::atomic<u64> signaled_fence{}; diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp new file mode 100644 index 000000000..6adef459e --- /dev/null +++ b/src/video_core/guest_driver.cpp @@ -0,0 +1,36 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <limits> + +#include "video_core/guest_driver.h" + +namespace VideoCore { + +void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { + if (texture_handler_size_deduced) { + return; + } + const std::size_t size = bound_offsets.size(); + if (size < 2) { + return; + } + std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); + u32 min_val = std::numeric_limits<u32>::max(); + for (std::size_t i = 1; i < size; ++i) { + if (bound_offsets[i] == bound_offsets[i - 1]) { + continue; + } + const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; + min_val = std::min(min_val, new_min); + } + if (min_val > 2) { + return; + } + texture_handler_size_deduced = true; + texture_handler_size = min_texture_handler_size * min_val; +} + +} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h new file mode 100644 index 000000000..fc1917347 --- /dev/null +++ b/src/video_core/guest_driver.h @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" + +namespace VideoCore { + +/** + * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect + * information necessary for impossible to avoid HLE methods like shader tracks as they are + * Entscheidungsproblems. + */ +class GuestDriverProfile { +public: + void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); + + u32 GetTextureHandlerSize() const { + return texture_handler_size; + } + + bool TextureHandlerSizeKnown() const { + return texture_handler_size_deduced; + } + +private: + // Minimum size of texture handler any driver can use. + static constexpr u32 min_texture_handler_size = 4; + // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily + // use 4 bytes instead. Thus, certain drivers may squish the size. + static constexpr u32 default_texture_handler_size = 8; + + u32 texture_handler_size = default_texture_handler_size; + bool texture_handler_size_deduced = false; +}; + +} // namespace VideoCore diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 11848fbce..f5d33f27a 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -9,6 +9,7 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -84,7 +85,9 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const auto cpu_addr = GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); - rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); + // Flush and invalidate through the GPU interface, to be asynchronous if possible. + system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); + UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() ->VMManager() @@ -242,6 +245,8 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { const u8* src_ptr{page_table.pointers[page_index] + page_offset}; + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); std::memcpy(dest_buffer, src_ptr, copy_amount); break; @@ -292,6 +297,8 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { u8* dest_ptr{page_table.pointers[page_index] + page_offset}; + // Invalidate must happen on the rasterizer interface, such that memory is always + // synchronous when it is written (even when in asynchronous GPU mode). rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); std::memcpy(dest_ptr, src_buffer, copy_amount); break; @@ -339,6 +346,8 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std:: switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is copied (even when in asynchronous GPU mode). const u8* src_ptr{page_table.pointers[page_index] + page_offset}; rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); WriteBlock(dest_addr, src_ptr, copy_amount); diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..e66054ed0 --- /dev/null +++ b/src/video_core/query_cache.h @@ -0,0 +1,359 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <array> +#include <cstring> +#include <iterator> +#include <memory> +#include <mutex> +#include <optional> +#include <unordered_map> +#include <vector> + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template <class QueryCache, class HostCounter> +class CounterStreamBase { +public: + explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) + : cache{cache}, type{type} {} + + /// Updates the state of the stream, enabling or disabling as needed. + void Update(bool enabled) { + if (enabled) { + Enable(); + } else { + Disable(); + } + } + + /// Resets the stream to zero. It doesn't disable the query after resetting. + void Reset() { + if (current) { + current->EndQuery(); + + // Immediately start a new query to avoid disabling its state. + current = cache.Counter(nullptr, type); + } + last = nullptr; + } + + /// Returns the current counter slicing as needed. + std::shared_ptr<HostCounter> Current() { + if (!current) { + return nullptr; + } + current->EndQuery(); + last = std::move(current); + current = cache.Counter(last, type); + return last; + } + + /// Returns true when the counter stream is enabled. + bool IsEnabled() const { + return current != nullptr; + } + +private: + /// Enables the stream. + void Enable() { + if (current) { + return; + } + current = cache.Counter(last, type); + } + + // Disables the stream. + void Disable() { + if (current) { + current->EndQuery(); + } + last = std::exchange(current, nullptr); + } + + QueryCache& cache; + const VideoCore::QueryType type; + + std::shared_ptr<HostCounter> current; + std::shared_ptr<HostCounter> last; +}; + +template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, + class QueryPool> +class QueryCacheBase { +public: + explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ + static_cast<QueryCache&>(*this), + VideoCore::QueryType::SamplesPassed}}} {} + + void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; + FlushAndRemoveRegion(addr, size); + } + + void FlushRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; + FlushAndRemoveRegion(addr, size); + } + + /** + * Records a query in GPU mapped memory, potentially marked with a timestamp. + * @param gpu_addr GPU address to flush to when the mapped memory is read. + * @param type Query type, e.g. SamplesPassed. + * @param timestamp Timestamp, when empty the flushed query is assumed to be short. + */ + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { + std::unique_lock lock{mutex}; + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + } + + query->BindCounter(Stream(type).Current(), timestamp); + } + + /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. + void UpdateCounters() { + std::unique_lock lock{mutex}; + const auto& regs = system.GPU().Maxwell3D().regs; + Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); + } + + /// Resets a counter to zero. It doesn't disable the query after resetting. + void ResetCounter(VideoCore::QueryType type) { + std::unique_lock lock{mutex}; + Stream(type).Reset(); + } + + /// Disable all active streams. Expected to be called at the end of a command buffer. + void DisableStreams() { + std::unique_lock lock{mutex}; + for (auto& stream : streams) { + stream.Update(false); + } + } + + /// Returns a new host counter. + std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) { + return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency), + type); + } + + /// Returns the counter stream of the specified type. + CounterStream& Stream(VideoCore::QueryType type) { + return streams[static_cast<std::size_t>(type)]; + } + + /// Returns the counter stream of the specified type. + const CounterStream& Stream(VideoCore::QueryType type) const { + return streams[static_cast<std::size_t>(type)]; + } + +protected: + std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; + +private: + /// Flushes a memory range to guest memory and removes it from the cache. + void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + const u64 addr_begin = static_cast<u64>(addr); + const u64 addr_end = addr_begin + static_cast<u64>(size); + const auto in_range = [addr_begin, addr_end](CachedQuery& query) { + const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_end = cache_begin + query.SizeInBytes(); + return cache_begin < addr_end && addr_begin < cache_end; + }; + + const u64 page_end = addr_end >> PAGE_SHIFT; + for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query)) { + continue; + } + rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + query.Flush(); + } + contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), + std::end(contents)); + } + } + + /// Registers the passed parameters as cached and returns a pointer to the stored cached query. + CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { + rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); + const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, + host_ptr); + } + + /// Tries to a get a cached query. Returns nullptr on failure. + CachedQuery* TryGet(CacheAddr addr) { + const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; + const auto it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + return nullptr; + } + auto& contents = it->second; + const auto found = + std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCacheAddr() == addr; }); + return found != std::end(contents) ? &*found : nullptr; + } + + static constexpr std::uintptr_t PAGE_SIZE = 4096; + static constexpr unsigned PAGE_SHIFT = 12; + + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; + + std::recursive_mutex mutex; + + std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; + + std::array<CounterStream, VideoCore::NumQueryTypes> streams; +}; + +template <class QueryCache, class HostCounter> +class HostCounterBase { +public: + explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_) + : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { + // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. + constexpr u64 depth_threshold = 96; + if (depth > depth_threshold) { + depth = 0; + base_result = dependency->Query(); + dependency = nullptr; + } + } + virtual ~HostCounterBase() = default; + + /// Returns the current value of the query. + u64 Query() { + if (result) { + return *result; + } + + u64 value = BlockingQuery() + base_result; + if (dependency) { + value += dependency->Query(); + dependency = nullptr; + } + + result = value; + return *result; + } + + /// Returns true when flushing this query will potentially wait. + bool WaitPending() const noexcept { + return result.has_value(); + } + + u64 Depth() const noexcept { + return depth; + } + +protected: + /// Returns the value of query from the backend API blocking as needed. + virtual u64 BlockingQuery() const = 0; + +private: + std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. + std::optional<u64> result; ///< Filled with the already returned value. + u64 depth; ///< Number of nested dependencies. + u64 base_result = 0; ///< Equivalent to nested dependencies value. +}; + +template <class HostCounter> +class CachedQueryBase { +public: + explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) + : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + virtual ~CachedQueryBase() = default; + + CachedQueryBase(CachedQueryBase&&) noexcept = default; + CachedQueryBase(const CachedQueryBase&) = delete; + + CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default; + CachedQueryBase& operator=(const CachedQueryBase&) = delete; + + /// Flushes the query to guest memory. + virtual void Flush() { + // When counter is nullptr it means that it's just been reseted. We are supposed to write a + // zero in these cases. + const u64 value = counter ? counter->Query() : 0; + std::memcpy(host_ptr, &value, sizeof(u64)); + + if (timestamp) { + std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + } + } + + /// Binds a counter to this query. + void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { + if (counter) { + // If there's an old counter set it means the query is being rewritten by the game. + // To avoid losing the data forever, flush here. + Flush(); + } + counter = std::move(counter_); + timestamp = timestamp_; + } + + VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + CacheAddr GetCacheAddr() const noexcept { + return ToCacheAddr(host_ptr); + } + + u64 SizeInBytes() const noexcept { + return SizeInBytes(timestamp.has_value()); + } + + static constexpr u64 SizeInBytes(bool with_timestamp) noexcept { + return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; + } + +protected: + /// Returns true when querying the counter may potentially block. + bool WaitPending() const noexcept { + return counter && counter->WaitPending(); + } + +private: + static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. + static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. + static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. + + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. + std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. +}; + +} // namespace VideoCommon diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5b0eca9e2..f18eaf4bc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,9 +6,11 @@ #include <atomic> #include <functional> +#include <optional> #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" +#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -16,6 +18,11 @@ class MemoryManager; namespace VideoCore { +enum class QueryType { + SamplesPassed, +}; +constexpr std::size_t NumQueryTypes = 1; + enum class LoadCallbackStage { Prepare, Decompile, @@ -28,11 +35,8 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} - /// Draw the current batch of vertex arrays - virtual bool DrawBatch(bool is_indexed) = 0; - - /// Draw the current batch of multiple instances of vertex arrays - virtual bool DrawMultiBatch(bool is_indexed) = 0; + /// Dispatches a draw invocation + virtual void Draw(bool is_indexed, bool is_instanced) = 0; /// Clear the current framebuffer virtual void Clear() = 0; @@ -40,6 +44,12 @@ public: /// Dispatches a compute shader invocation virtual void DispatchCompute(GPUVAddr code_addr) = 0; + /// Resets the counter of a query + virtual void ResetCounter(QueryType type) = 0; + + /// Records a GPU query and caches it + virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; + /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; @@ -78,5 +88,18 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, const DiskResourceLoadCallback& callback = {}) {} + + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. + GuestDriverProfile& AccessGuestDriverProfile() { + return guest_driver_profile; + } + + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. + const GuestDriverProfile& AccessGuestDriverProfile() const { + return guest_driver_profile; + } + +private: + GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp new file mode 100644 index 000000000..f12e9f55f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -0,0 +1,120 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstring> +#include <memory> +#include <unordered_map> +#include <utility> +#include <vector> + +#include <glad/glad.h> + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" + +namespace OpenGL { + +namespace { + +constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; + +constexpr GLenum GetTarget(VideoCore::QueryType type) { + return QueryTargets[static_cast<std::size_t>(type)]; +} + +} // Anonymous namespace + +QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) + : VideoCommon::QueryCacheBase< + QueryCache, CachedQuery, CounterStream, HostCounter, + std::vector<OGLQuery>>{system, + static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, + gl_rasterizer{gl_rasterizer} {} + +QueryCache::~QueryCache() = default; + +OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { + auto& reserve = query_pools[static_cast<std::size_t>(type)]; + OGLQuery query; + if (reserve.empty()) { + query.Create(GetTarget(type)); + return query; + } + + query = std::move(reserve.back()); + reserve.pop_back(); + return query; +} + +void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { + query_pools[static_cast<std::size_t>(type)].push_back(std::move(query)); +} + +bool QueryCache::AnyCommandQueued() const noexcept { + return gl_rasterizer.AnyCommandQueued(); +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)} { + glBeginQuery(GetTarget(type), query.handle); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, std::move(query)); +} + +void HostCounter::EndQuery() { + if (!cache.AnyCommandQueued()) { + // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not + // having any of these causes a lock. glFlush is considered a command, so we can safely wait + // for this. Insert to the OpenGL command stream a flush. + glFlush(); + } + glEndQuery(GetTarget(type)); +} + +u64 HostCounter::BlockingQuery() const { + GLint64 value; + glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); + return static_cast<u64>(value); +} + +CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} + +CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept + : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} + +CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { + VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); + cache = rhs.cache; + type = rhs.type; + return *this; +} + +void CachedQuery::Flush() { + // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. + // To avoid this disable and re-enable keeping the dependency stream. + // But we only have to do this if we have pending waits to be done. + auto& stream = cache->Stream(type); + const bool slice_counter = WaitPending() && stream.IsEnabled(); + if (slice_counter) { + stream.Update(false); + } + + VideoCommon::CachedQueryBase<HostCounter>::Flush(); + + if (slice_counter) { + stream.Update(true); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h new file mode 100644 index 000000000..d8e7052a1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -0,0 +1,78 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <vector> + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Core { +class System; +} + +namespace OpenGL { + +class CachedQuery; +class HostCounter; +class QueryCache; +class RasterizerOpenGL; + +using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; + +class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, + HostCounter, std::vector<OGLQuery>> { +public: + explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + ~QueryCache(); + + OGLQuery AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, OGLQuery&& query); + + bool AnyCommandQueued() const noexcept; + +private: + RasterizerOpenGL& gl_rasterizer; +}; + +class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { +public: + explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + QueryCache& cache; + const VideoCore::QueryType type; + OGLQuery query; +}; + +class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { +public: + explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, + u8* host_ptr); + CachedQuery(CachedQuery&& rhs) noexcept; + CachedQuery(const CachedQuery&) = delete; + + CachedQuery& operator=(CachedQuery&& rhs) noexcept; + CachedQuery& operator=(const CachedQuery&) = delete; + + void Flush() override; + +private: + QueryCache* cache; + VideoCore::QueryType type; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c428f06e4..e1965fb21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,6 +25,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -55,16 +56,20 @@ namespace { template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type) { + Tegra::Engines::ShaderType shader_type, + std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 offset = + entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(shader_type, entry.GetOffset()); + return engine.GetStageTexture(shader_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -88,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { + shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, + screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -244,9 +249,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { } GLintptr RasterizerOpenGL::SetupIndexBuffer() { - if (accelerate_draw != AccelDraw::Indexed) { - return 0; - } MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); @@ -540,10 +542,16 @@ void RasterizerOpenGL::Clear() { } else if (use_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } + + ++num_queued_commands; } -void RasterizerOpenGL::DrawPrelude() { +void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { + MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); + const auto& regs = gpu.regs; + + query_cache.UpdateCounters(); SyncRasterizeEnable(state); SyncColorMask(); @@ -563,9 +571,6 @@ void RasterizerOpenGL::DrawPrelude() { buffer_cache.Acquire(); - // Draw the vertex batch - const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - std::size_t buffer_size = CalculateVertexArraysSize(); // Add space for index buffer @@ -592,7 +597,11 @@ void RasterizerOpenGL::DrawPrelude() { // Upload vertex and index data. SetupVertexBuffer(vao); SetupVertexInstances(vao); - index_buffer_offset = SetupIndexBuffer(); + + GLintptr index_buffer_offset; + if (is_indexed) { + index_buffer_offset = SetupIndexBuffer(); + } // Prepare packed bindings. bind_ubo_pushbuffer.Setup(); @@ -608,7 +617,7 @@ void RasterizerOpenGL::DrawPrelude() { // Setup shaders and their used resources. texture_cache.GuardSamplers(true); - const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); SetupShaders(primitive_mode); texture_cache.GuardSamplers(false); @@ -626,6 +635,7 @@ void RasterizerOpenGL::DrawPrelude() { // As all cached buffers are invalidated, we need to recheck their state. gpu.dirty.ResetVertexArrays(); } + gpu.dirty.memory_general = false; shader_program_manager->ApplyTo(state); state.Apply(); @@ -633,107 +643,46 @@ void RasterizerOpenGL::DrawPrelude() { if (texture_cache.TextureBarrier()) { glTextureBarrier(); } -} -struct DrawParams { - bool is_indexed{}; - bool is_instanced{}; - GLenum primitive_mode{}; - GLint count{}; - GLint base_vertex{}; - - // Indexed settings - GLenum index_format{}; - GLintptr index_buffer_offset{}; - - // Instanced setting - GLint num_instances{}; - GLint base_instance{}; - - void DispatchDraw() { - if (is_indexed) { - const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); - if (is_instanced) { - glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, - index_buffer_ptr, num_instances, - base_vertex, base_instance); - } else { - glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, - base_vertex); - } + ++num_queued_commands; + + const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); + const GLsizei num_instances = + static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); + if (is_indexed) { + const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); + const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); + const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); + const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); + if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { + glDrawElements(primitive_mode, num_vertices, format, offset); + } else if (num_instances == 1 && base_instance == 0) { + glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex); + } else if (base_vertex == 0 && base_instance == 0) { + glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances); + } else if (base_vertex == 0) { + glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset, + num_instances, base_instance); + } else if (base_instance == 0) { + glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset, + num_instances, base_vertex); } else { - if (is_instanced) { - glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances, - base_instance); - } else { - glDrawArrays(primitive_mode, base_vertex, count); - } + glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format, + offset, num_instances, base_vertex, + base_instance); } - } -}; - -bool RasterizerOpenGL::DrawBatch(bool is_indexed) { - accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - - MICROPROFILE_SCOPE(OpenGL_Drawing); - - DrawPrelude(); - - auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& regs = maxwell3d.regs; - const auto current_instance = maxwell3d.state.current_instance; - DrawParams draw_call{}; - draw_call.is_indexed = is_indexed; - draw_call.num_instances = static_cast<GLint>(1); - draw_call.base_instance = static_cast<GLint>(current_instance); - draw_call.is_instanced = current_instance > 0; - draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); - if (draw_call.is_indexed) { - draw_call.count = static_cast<GLint>(regs.index_array.count); - draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); - draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - draw_call.index_buffer_offset = index_buffer_offset; } else { - draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); - draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); - } - draw_call.DispatchDraw(); - - maxwell3d.dirty.memory_general = false; - accelerate_draw = AccelDraw::Disabled; - return true; -} - -bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { - accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - - MICROPROFILE_SCOPE(OpenGL_Drawing); - - DrawPrelude(); - - auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& regs = maxwell3d.regs; - const auto& draw_setup = maxwell3d.mme_draw; - DrawParams draw_call{}; - draw_call.is_indexed = is_indexed; - draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count); - draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance); - draw_call.is_instanced = draw_setup.instance_count > 1; - draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); - if (draw_call.is_indexed) { - draw_call.count = static_cast<GLint>(regs.index_array.count); - draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); - draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - draw_call.index_buffer_offset = index_buffer_offset; - } else { - draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); - draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); + const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); + const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); + if (num_instances == 1 && base_instance == 0) { + glDrawArrays(primitive_mode, base_vertex, num_vertices); + } else if (base_instance == 0) { + glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances); + } else { + glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, + num_instances, base_instance); + } } - draw_call.DispatchDraw(); - - maxwell3d.dirty.memory_general = false; - accelerate_draw = AccelDraw::Disabled; - return true; } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { @@ -776,6 +725,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { state.ApplyProgramPipeline(); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); + ++num_queued_commands; +} + +void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional<u64> timestamp) { + query_cache.Query(gpu_addr, type, timestamp); } void RasterizerOpenGL::FlushAll() {} @@ -787,6 +746,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -797,6 +757,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { @@ -807,10 +768,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void RasterizerOpenGL::FlushCommands() { + // Only flush when we have commands queued to OpenGL. + if (num_queued_commands == 0) { + return; + } + num_queued_commands = 0; glFlush(); } void RasterizerOpenGL::TickFrame() { + // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. + num_queued_commands = 0; + buffer_cache.TickFrame(); } @@ -942,8 +911,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetShaderEntries().samplers) { const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); + SetupTexture(binding++, texture, entry); + } + } } } @@ -952,8 +928,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().samplers) { - const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); + SetupTexture(binding++, texture, entry); + } + } } } @@ -1273,6 +1258,7 @@ void RasterizerOpenGL::SyncPointState() { // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). state.point.program_control = regs.vp_point_size.enable != 0; + state.point.sprite = regs.point_sprite_enable != 0; state.point.size = std::max(1.0f, regs.point_size); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6a27cf497..68abe9a21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -57,10 +58,11 @@ public: ScreenInfo& info); ~RasterizerOpenGL() override; - bool DrawBatch(bool is_indexed) override; - bool DrawMultiBatch(bool is_indexed) override; + void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -75,6 +77,11 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + /// Returns true when there are commands queued to the OpenGL server. + bool AnyCommandQueued() const { + return num_queued_commands > 0; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -102,9 +109,6 @@ private: void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size); - /// Syncs all the state, shaders, render targets and textures setting before a draw call. - void DrawPrelude(); - /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, const Shader& shader); @@ -180,10 +184,23 @@ private: /// Syncs the alpha test state to match the guest state void SyncAlphaTest(); - /// Check for extension that are not strictly required - /// but are needed for correct emulation + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); + std::size_t CalculateVertexArraysSize() const; + + std::size_t CalculateIndexBufferSize() const; + + /// Updates and returns a vertex array object representing current vertex format + GLuint SetupVertexFormat(); + + void SetupVertexBuffer(GLuint vao); + void SetupVertexInstances(GLuint vao); + + GLintptr SetupIndexBuffer(); + + void SetupShaders(GLenum primitive_mode); + const Device device; OpenGLState state; @@ -191,6 +208,7 @@ private: ShaderCacheOpenGL shader_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; + QueryCache query_cache; Core::System& system; ScreenInfo& screen_info; @@ -208,24 +226,8 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; - std::size_t CalculateVertexArraysSize() const; - - std::size_t CalculateIndexBufferSize() const; - - /// Updates and returns a vertex array object representing current vertex format - GLuint SetupVertexFormat(); - - void SetupVertexBuffer(GLuint vao); - void SetupVertexInstances(GLuint vao); - - GLintptr SetupIndexBuffer(); - - GLintptr index_buffer_offset; - - void SetupShaders(GLenum primitive_mode); - - enum class AccelDraw { Disabled, Arrays, Indexed }; - AccelDraw accelerate_draw = AccelDraw::Disabled; + /// Number of commands queued to the OpenGL driver. Reseted on flush. + std::size_t num_queued_commands = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 5c96c1d46..f0ddfb276 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -207,4 +207,21 @@ void OGLFramebuffer::Release() { handle = 0; } +void OGLQuery::Create(GLenum target) { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glCreateQueries(target, 1, &handle); +} + +void OGLQuery::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteQueries(1, &handle); + handle = 0; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 3a85a1d4c..514d1d165 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -266,4 +266,29 @@ public: GLuint handle = 0; }; +class OGLQuery : private NonCopyable { +public: + OGLQuery() = default; + + OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLQuery() { + Release(); + } + + OGLQuery& operator=(OGLQuery&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(GLenum target); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3c5bdd377..489eb143c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s } void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + locker.SetBoundBuffer(usage.bound_buffer); for (const auto& key : usage.keys) { const auto [buffer, offset] = key.first; locker.InsertKey(buffer, offset, key.second); @@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() { ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, const ConstBufferLocker& locker) const { - return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), + return ShaderDiskCacheUsage{unique_identifier, variant, + locker.GetBoundBuffer(), locker.GetKeys(), locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2996aaf08..4735000b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -391,6 +391,7 @@ public: DeclareVertex(); DeclareGeometry(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareInternalFlags(); @@ -503,6 +504,16 @@ private: } } + void DeclareCustomVariables() { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { + code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); + } + if (num_custom_variables > 0) { + code.AddNewLine(); + } + } + void DeclarePredicates() { const auto& predicates = ir.GetPredicates(); for (const auto pred : predicates) { @@ -655,7 +666,8 @@ private: u32 binding = device.GetBaseBindings(stage).sampler; for (const auto& sampler : ir.GetSamplers()) { const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding++); + const std::string description = fmt::format("layout (binding = {}) uniform", binding); + binding += sampler.IsIndexed() ? sampler.Size() : 1; std::string sampler_type = [&]() { if (sampler.IsBuffer()) { @@ -682,7 +694,11 @@ private: sampler_type += "Shadow"; } - code.AddLine("{} {} {};", description, sampler_type, name); + if (!sampler.IsIndexed()) { + code.AddLine("{} {} {};", description, sampler_type, name); + } else { + code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size()); + } } if (!ir.GetSamplers().empty()) { code.AddNewLine(); @@ -775,6 +791,11 @@ private: return {GetRegister(index), Type::Float}; } + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + const u32 index = cv->GetIndex(); + return {GetCustomVariable(index), Type::Float}; + } + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { @@ -1019,7 +1040,6 @@ private: } return {{"gl_ViewportIndex", Type::Int}}; case 3: - UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); return {{"gl_PointSize", Type::Float}}; } return {}; @@ -1099,7 +1119,11 @@ private: } else if (!meta->ptp.empty()) { expr += "Offsets"; } - expr += '(' + GetSampler(meta->sampler) + ", "; + if (!meta->sampler.IsIndexed()) { + expr += '(' + GetSampler(meta->sampler) + ", "; + } else { + expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; + } expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow && !separate_dc ? 1 : 0) - 1); expr += '('; @@ -1311,6 +1335,8 @@ private: const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), Type::Uint}; + } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { + target = {GetCustomVariable(cv->GetIndex()), Type::Float}; } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1858,10 +1884,7 @@ private: template <const std::string_view& opname, Type type> Expression Atomic(Operation operation) { - ASSERT(stage == ShaderType::Compute); - auto& smem = std::get<SmemNode>(*operation[0]); - - return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), + return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).As(type)), type}; } @@ -2241,6 +2264,10 @@ private: return GetDeclarationWithSuffix(index, "gpr"); } + std::string GetCustomVariable(u32 index) const { + return GetDeclarationWithSuffix(index, "custom_var"); + } + std::string GetPredicate(Tegra::Shader::Pred pred) const { return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index cf874a09a..1fc204f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -53,7 +53,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 11; +constexpr u32 NativeVersion = 12; // Making sure sizes doesn't change by accident static_assert(sizeof(ProgramVariant) == 20); @@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 num_bound_samplers{}; u32 num_bindless_samplers{}; if (file.ReadArray(&usage.unique_identifier, 1) != 1 || - file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || + file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { LOG_ERROR(Render_OpenGL, error_loading); @@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { u32 num_bindless_samplers{}; ShaderDiskCacheUsage usage; if (!LoadObjectFromPrecompiled(usage.unique_identifier) || - !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(usage.variant) || + !LoadObjectFromPrecompiled(usage.bound_buffer) || + !LoadObjectFromPrecompiled(num_keys) || !LoadObjectFromPrecompiled(num_bound_samplers) || !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; @@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(usage.bound_buffer) != 1 || file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { @@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p }; if (!SaveObjectToPrecompiled(usage.unique_identifier) || - !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 69a2fbdda..ef2371f6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>); struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + u32 bound_buffer{}; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index cc185e9e1..ab1f7983c 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -128,6 +128,7 @@ void OpenGLState::ApplyClipDistances() { void OpenGLState::ApplyPointSize() { Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); + Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite); if (UpdateValue(cur_state.point.size, point.size)) { glPointSize(point.size); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 678e5cd89..4953eeda2 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -132,6 +132,7 @@ public: struct { bool program_control = false; // GL_PROGRAM_POINT_SIZE + bool sprite = false; // GL_POINT_SPRITE GLfloat size = 1.0f; // GL_POINT_SIZE } point; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e95eb069e..d4b81cd87 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -176,6 +176,19 @@ GLint GetSwizzleSource(SwizzleSource source) { return GL_NONE; } +GLenum GetComponent(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8Z24: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { if (params.IsBuffer()) { return; @@ -184,7 +197,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); if (params.num_levels == 1) { glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); } @@ -416,11 +429,21 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou if (new_swizzle == swizzle) return; swizzle = new_swizzle; - const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), - GetSwizzleSource(z_source), - GetSwizzleSource(w_source)}; + const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), + GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; const GLuint handle = GetTexture(); - glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + const PixelFormat format = surface.GetSurfaceParams().pixel_format; + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + case PixelFormat::S8Z24: + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + GetComponent(format, x_source == SwizzleSource::R)); + break; + default: + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + break; + } } OGLTextureView CachedSurfaceView::CreateTextureView() const { @@ -529,8 +552,11 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), + static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), + static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), + static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), + buffers, is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ea4f35663..7ed505628 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -47,8 +47,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } case Maxwell::VertexAttribute::Type::SignedInt: @@ -72,8 +71,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } case Maxwell::VertexAttribute::Type::Float: @@ -89,13 +87,19 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + return {}; + } + case Maxwell::VertexAttribute::Type::UnsignedScaled: + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8: + return GL_UNSIGNED_BYTE; + default: + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); return {}; } } diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 331808113..5403c3ab7 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -164,7 +164,7 @@ struct FormatTuple { {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 {vk::Format::eUndefined, {}}, // BGRA8_SRGB {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB - {vk::Format::eUndefined, {}}, // DXT23_SRGB + {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U @@ -363,6 +363,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return vk::Format::eR8G8B8A8Uint; case Maxwell::VertexAttribute::Size::Size_32: return vk::Format::eR32Uint; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return vk::Format::eR32G32B32A32Uint; default: break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp new file mode 100644 index 000000000..d5032b432 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -0,0 +1,265 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <optional> +#include <vector> + +#include <fmt/format.h> + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/telemetry.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/frontend/emu_window.h" +#include "core/memory.h" +#include "core/perf_stats.h" +#include "core/settings.h" +#include "core/telemetry_session.h" +#include "video_core/gpu.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +namespace Vulkan { + +namespace { + +VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, + VkDebugUtilsMessageTypeFlagsEXT type, + const VkDebugUtilsMessengerCallbackDataEXT* data, + [[maybe_unused]] void* user_data) { + const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_}; + const char* message{data->pMessage}; + + if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { + LOG_CRITICAL(Render_Vulkan, "{}", message); + } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) { + LOG_WARNING(Render_Vulkan, "{}", message); + } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) { + LOG_INFO(Render_Vulkan, "{}", message); + } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) { + LOG_DEBUG(Render_Vulkan, "{}", message); + } + return VK_FALSE; +} + +std::string GetReadableVersion(u32 version) { + return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), + VK_VERSION_PATCH(version)); +} + +std::string GetDriverVersion(const VKDevice& device) { + // Extracted from + // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 + const u32 version = device.GetDriverVersion(); + + if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + const u32 major = (version >> 22) & 0x3ff; + const u32 minor = (version >> 14) & 0x0ff; + const u32 secondary = (version >> 6) & 0x0ff; + const u32 tertiary = version & 0x003f; + return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); + } + if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) { + const u32 major = version >> 14; + const u32 minor = version & 0x3fff; + return fmt::format("{}.{}", major, minor); + } + + return GetReadableVersion(version); +} + +std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) { + std::sort(std::begin(available_extensions), std::end(available_extensions)); + + static constexpr std::size_t AverageExtensionSize = 64; + std::string separated_extensions; + separated_extensions.reserve(available_extensions.size() * AverageExtensionSize); + + const auto end = std::end(available_extensions); + for (auto extension = std::begin(available_extensions); extension != end; ++extension) { + if (const bool is_last = extension + 1 == end; is_last) { + separated_extensions += *extension; + } else { + separated_extensions += fmt::format("{},", *extension); + } + } + return separated_extensions; +} + +} // Anonymous namespace + +RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system) + : RendererBase(window), system{system} {} + +RendererVulkan::~RendererVulkan() { + ShutDown(); +} + +void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + const auto& layout = render_window.GetFramebufferLayout(); + if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) { + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; + const bool use_accelerated = + rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + const bool is_srgb = use_accelerated && screen_info.is_srgb; + if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { + swapchain->Create(layout.width, layout.height, is_srgb); + blit_screen->Recreate(); + } + + scheduler->WaitWorker(); + + swapchain->AcquireNextImage(); + const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated); + + scheduler->Flush(false, render_semaphore); + + if (swapchain->Present(render_semaphore, fence)) { + blit_screen->Recreate(); + } + + render_window.SwapBuffers(); + rasterizer->TickFrame(); + } + + render_window.PollEvents(); +} + +bool RendererVulkan::Init() { + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; + render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); + const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); + + std::optional<vk::DebugUtilsMessengerEXT> callback; + if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) { + callback = CreateDebugCallback(dldi); + if (!callback) { + return false; + } + } + + if (!PickDevices(dldi)) { + if (callback) { + instance.destroy(*callback, nullptr, dldi); + } + return false; + } + debug_callback = UniqueDebugUtilsMessengerEXT( + *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>( + instance, nullptr, device->GetDispatchLoader())); + + Report(); + + memory_manager = std::make_unique<VKMemoryManager>(*device); + + resource_manager = std::make_unique<VKResourceManager>(*device); + + const auto& framebuffer = render_window.GetFramebufferLayout(); + swapchain = std::make_unique<VKSwapchain>(surface, *device); + swapchain->Create(framebuffer.width, framebuffer.height, false); + + scheduler = std::make_unique<VKScheduler>(*device, *resource_manager); + + rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, + *resource_manager, *memory_manager, *scheduler); + + blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, + *resource_manager, *memory_manager, *swapchain, + *scheduler, screen_info); + + return true; +} + +void RendererVulkan::ShutDown() { + if (!device) { + return; + } + const auto dev = device->GetLogical(); + const auto& dld = device->GetDispatchLoader(); + if (dev && dld.vkDeviceWaitIdle) { + dev.waitIdle(dld); + } + + rasterizer.reset(); + blit_screen.reset(); + scheduler.reset(); + swapchain.reset(); + memory_manager.reset(); + resource_manager.reset(); + device.reset(); +} + +std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( + const vk::DispatchLoaderDynamic& dldi) { + const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( + {}, + vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | + vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | + vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | + vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose, + vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | + vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | + vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, + &DebugCallback, nullptr); + vk::DebugUtilsMessengerEXT callback; + if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) != + vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); + return {}; + } + return callback; +} + +bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { + const auto devices = instance.enumeratePhysicalDevices(dldi); + + // TODO(Rodrigo): Choose device from config file + const s32 device_index = Settings::values.vulkan_device; + if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { + LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); + return false; + } + const vk::PhysicalDevice physical_device = devices[device_index]; + + if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { + return false; + } + + device = std::make_unique<VKDevice>(dldi, physical_device, surface); + return device->Create(dldi, instance); +} + +void RendererVulkan::Report() const { + const std::string vendor_name{device->GetVendorName()}; + const std::string model_name{device->GetModelName()}; + const std::string driver_version = GetDriverVersion(*device); + const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); + + const std::string api_version = GetReadableVersion(device->GetApiVersion()); + + const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); + + LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); + LOG_INFO(Render_Vulkan, "Device: {}", model_name); + LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version); + + auto& telemetry_session = system.TelemetrySession(); + constexpr auto field = Telemetry::FieldType::UserSystem; + telemetry_session.AddField(field, "GPU_Vendor", vendor_name); + telemetry_session.AddField(field, "GPU_Model", model_name); + telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); + telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version); + telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); +} + +} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 939eebe83..d1da4f9d3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -104,8 +104,11 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.depthBiasClamp = true; features.geometryShader = true; features.tessellationShader = true; + features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; + features.shaderStorageImageReadWithoutFormat = + is_shader_storage_img_read_without_format_supported; features.shaderStorageImageWriteWithoutFormat = true; features.textureCompressionASTC_LDR = is_optimal_astc_supported; @@ -117,6 +120,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan bit8_storage.uniformAndStorageBuffer8BitAccess = true; SetNext(next, bit8_storage); + vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + host_query_reset.hostQueryReset = true; + SetNext(next, host_query_reset); + vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8.shaderFloat16 = true; @@ -273,6 +280,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, }; std::bitset<required_extensions.size()> available_extensions{}; @@ -340,6 +348,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, @@ -376,7 +385,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(13); + extensions.reserve(14); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -384,6 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); + extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); [[maybe_unused]] const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); @@ -400,8 +410,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); - Test(extension, nv_device_diagnostic_checkpoints, - VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); + if (Settings::values.renderer_debug) { + Test(extension, nv_device_diagnostic_checkpoints, + VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); + } } if (khr_shader_float16_int8) { @@ -455,6 +467,8 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { const auto supported_features{physical.getFeatures(dldi)}; + is_shader_storage_img_read_without_format_supported = + supported_features.shaderStorageImageReadWithoutFormat; is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } @@ -528,6 +542,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eBc6HUfloatBlock, vk::Format::eBc6HSfloatBlock, vk::Format::eBc1RgbaSrgbBlock, + vk::Format::eBc2SrgbBlock, vk::Format::eBc3SrgbBlock, vk::Format::eBc7SrgbBlock, vk::Format::eAstc4x4SrgbBlock, diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 72603f9f6..2c27ad730 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public: return properties.limits.maxPushConstantsSize; } + /// Returns true if Shader storage Image Read Without Format supported. + bool IsShaderStorageImageReadWithoutFormatSupported() const { + return is_shader_storage_img_read_without_format_supported; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -227,6 +232,8 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. + bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage + ///< image read without format // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 48e23d4cd..7ddf7d3ee 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -325,9 +325,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { specialization.tessellation.primitive = fixed_state.tessellation.primitive; specialization.tessellation.spacing = fixed_state.tessellation.spacing; specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; - for (const auto& rt : key.renderpass_params.color_attachments) { - specialization.enabled_rendertargets.set(rt.index); - } SPIRVProgram program; std::vector<vk::DescriptorSetLayoutBinding> bindings; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp new file mode 100644 index 000000000..ffbf60dda --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <utility> +#include <vector> + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +namespace { + +constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; + +constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { + return QUERY_TARGETS[static_cast<std::size_t>(type)]; +} + +} // Anonymous namespace + +QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} + +QueryPool::~QueryPool() = default; + +void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { + device = &device_; + type = type_; +} + +std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { + std::size_t index; + do { + index = CommitResource(fence); + } while (usage[index]); + usage[index] = true; + + return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; +} + +void QueryPool::Allocate(std::size_t begin, std::size_t end) { + usage.resize(end); + + const auto dev = device->GetLogical(); + const u32 size = static_cast<u32>(end - begin); + const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); + pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); +} + +void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { + const auto it = + std::find_if(std::begin(pools), std::end(pools), + [query_pool = query.first](auto& pool) { return query_pool == *pool; }); + ASSERT(it != std::end(pools)); + + const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); + usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; +} + +VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler) + : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, + QueryPool>{system, rasterizer}, + device{device}, scheduler{scheduler} { + for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { + query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); + } +} + +VKQueryCache::~VKQueryCache() = default; + +std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { + return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); +} + +void VKQueryCache::Reserve(VideoCore::QueryType type, + std::pair<vk::QueryPool, std::uint32_t> query) { + query_pools[static_cast<std::size_t>(type)].Reserve(query); +} + +HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { + const auto dev = cache.Device().GetLogical(); + cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { + dev.resetQueryPoolEXT(query.first, query.second, 1, dld); + cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); + }); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, query); +} + +void HostCounter::EndQuery() { + cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { + cmdbuf.endQuery(query.first, query.second, dld); + }); +} + +u64 HostCounter::BlockingQuery() const { + if (ticks >= cache.Scheduler().Ticks()) { + cache.Scheduler().Flush(); + } + + const auto dev = cache.Device().GetLogical(); + const auto& dld = cache.Device().GetDispatchLoader(); + u64 value; + dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); + return value; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h new file mode 100644 index 000000000..c3092ee96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -0,0 +1,104 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Vulkan { + +class CachedQuery; +class HostCounter; +class VKDevice; +class VKQueryCache; +class VKScheduler; + +using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; + +class QueryPool final : public VKFencedPool { +public: + explicit QueryPool(); + ~QueryPool() override; + + void Initialize(const VKDevice& device, VideoCore::QueryType type); + + std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); + + void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); + +protected: + void Allocate(std::size_t begin, std::size_t end) override; + +private: + static constexpr std::size_t GROW_STEP = 512; + + const VKDevice* device = nullptr; + VideoCore::QueryType type = {}; + + std::vector<UniqueQueryPool> pools; + std::vector<bool> usage; +}; + +class VKQueryCache final + : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, + QueryPool> { +public: + explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler); + ~VKQueryCache(); + + std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); + + const VKDevice& Device() const noexcept { + return device; + } + + VKScheduler& Scheduler() const noexcept { + return scheduler; + } + +private: + const VKDevice& device; + VKScheduler& scheduler; +}; + +class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { +public: + explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + VKQueryCache& cache; + const VideoCore::QueryType type; + const std::pair<vk::QueryPool, std::uint32_t> query; + const u64 ticks; +}; + +class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { +public: + explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d2c6b1189..31c078f6a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -289,25 +289,19 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool), pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device) {} - -RasterizerVulkan::~RasterizerVulkan() = default; - -bool RasterizerVulkan::DrawBatch(bool is_indexed) { - Draw(is_indexed, false); - return true; + sampler_cache(device), query_cache(system, *this, device, scheduler) { + scheduler.SetQueryCache(query_cache); } -bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) { - Draw(is_indexed, true); - return true; -} +RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Drawing); FlushWork(); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; @@ -362,6 +356,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); if (!system.GPU().Maxwell3D().ShouldExecute()) { return; @@ -429,6 +425,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { sampled_views.clear(); image_views.clear(); + query_cache.UpdateCounters(); + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ComputePipelineCacheKey key{ code_addr, @@ -471,17 +469,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { }); } +void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional<u64> timestamp) { + query_cache.Query(gpu_addr, type, timestamp); +} + void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { @@ -571,7 +580,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); } if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { - texceptions.set(rt); + texceptions[rt] = true; } } @@ -579,7 +588,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { zeta_attachment = texture_cache.GetDepthBufferSurface(true); } if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { - texceptions.set(ZETA_TEXCEPTION_INDEX); + texceptions[ZETA_TEXCEPTION_INDEX] = true; } texture_cache.GuardRenderTargets(false); @@ -1122,11 +1131,12 @@ RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) { const auto& rendertarget = regs.rt[rt]; - if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) + if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { continue; + } renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), - texceptions.test(rt)}); + texceptions[rt]}); } renderpass_params.has_zeta = regs.zeta_enable; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7be71e734..138903d60 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -96,7 +97,7 @@ struct ImageView { vk::ImageLayout* layout = nullptr; }; -class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, VKScreenInfo& screen_info, const VKDevice& device, @@ -104,10 +105,11 @@ public: VKScheduler& scheduler); ~RasterizerVulkan() override; - bool DrawBatch(bool is_indexed) override; - bool DrawMultiBatch(bool is_indexed) override; + void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -140,8 +142,6 @@ private: static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; - void Draw(bool is_indexed, bool is_instanced); - void FlushWork(); Texceptions UpdateAttachments(); @@ -247,6 +247,7 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; + VKQueryCache query_cache; std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 0a8ec8398..204b7c39c 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -23,7 +23,14 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> } else if (color == std::array<float, 4>{1, 1, 1, 1}) { return vk::BorderColor::eFloatOpaqueWhite; } else { - return {}; + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return vk::BorderColor::eFloatOpaqueWhite; + } + if (color[3] > 0.5f) { + return vk::BorderColor::eFloatOpaqueBlack; + } + return vk::BorderColor::eFloatTransparentBlack; } } @@ -37,8 +44,6 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const auto border_color{tsc.GetBorderColor()}; const auto vk_border_color{TryConvertBorderColor(border_color)}; - UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", - border_color[0], border_color[1], border_color[2], border_color[3]); constexpr bool unnormalized_coords{false}; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index d66133ad1..92bd6c344 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { } void VKScheduler::AllocateNewContext() { + ++ticks; + std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() { current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, device.GetDispatchLoader()); + // Enable counters once again. These are disabled when a command buffer is finished. + if (query_cache) { + query_cache->UpdateCounters(); + } } void VKScheduler::InvalidateState() { @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() { } void VKScheduler::EndPendingOperations() { + query_cache->DisableStreams(); EndRenderPass(); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bcdffbba0..62fd7858b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@ #pragma once +#include <atomic> #include <condition_variable> #include <memory> #include <optional> @@ -18,6 +19,7 @@ namespace Vulkan { class VKDevice; class VKFence; +class VKQueryCache; class VKResourceManager; class VKFenceView { @@ -67,6 +69,11 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(vk::Pipeline pipeline); + /// Assigns the query cache. + void SetQueryCache(VKQueryCache& query_cache_) { + query_cache = &query_cache_; + } + /// Returns true when viewports have been set in the current command buffer. bool TouchViewports() { return std::exchange(state.viewports, true); @@ -112,6 +119,11 @@ public: return current_fence; } + /// Returns the current command buffer tick. + u64 Ticks() const { + return ticks; + } + private: class Command { public: @@ -205,6 +217,8 @@ private: const VKDevice& device; VKResourceManager& resource_manager; + VKQueryCache* query_cache = nullptr; + vk::CommandBuffer current_cmdbuf; VKFence* current_fence = nullptr; VKFence* next_fence = nullptr; @@ -227,6 +241,7 @@ private: Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; std::mutex mutex; std::condition_variable cv; + std::atomic<u64> ticks = 0; bool quit = false; }; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index dd6d2ef03..6d0bf6aa1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -275,12 +275,14 @@ public: AddCapability(spv::Capability::ImageGatherExtended); AddCapability(spv::Capability::SampledBuffer); AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + AddCapability(spv::Capability::DrawParameters); AddCapability(spv::Capability::SubgroupBallotKHR); AddCapability(spv::Capability::SubgroupVoteKHR); AddExtension("SPV_KHR_shader_ballot"); AddExtension("SPV_KHR_subgroup_vote"); AddExtension("SPV_KHR_storage_buffer_storage_class"); AddExtension("SPV_KHR_variable_pointers"); + AddExtension("SPV_KHR_shader_draw_parameters"); if (ir.UsesViewportIndex()) { AddCapability(spv::Capability::MultiViewport); @@ -290,6 +292,10 @@ public: } } + if (device.IsShaderStorageImageReadWithoutFormatSupported()) { + AddCapability(spv::Capability::StorageImageReadWithoutFormat); + } + if (device.IsFloat16Supported()) { AddCapability(spv::Capability::Float16); } @@ -353,6 +359,7 @@ private: DeclareFragment(); DeclareCompute(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareSharedMemory(); @@ -491,9 +498,11 @@ private: interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); // Declare input attributes - vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_uint, "vertex_index"); + vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); instance_index = - DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_uint, "instance_index"); + DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); + base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); + base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); } void DeclareTessControl() { @@ -542,11 +551,10 @@ private: return; } - for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { - if (!specialization.enabled_rendertargets[rt]) { + for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) { + if (!IsRenderTargetEnabled(rt)) { continue; } - const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); Name(id, fmt::format("frag_color{}", rt)); Decorate(id, spv::Decoration::Location, rt); @@ -587,6 +595,15 @@ private: } } + void DeclareCustomVariables() { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { + const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); + Name(id, fmt::format("custom_var_{}", i)); + custom_variables.emplace(i, AddGlobalVariable(id)); + } + } + void DeclarePredicates() { for (const auto pred : ir.GetPredicates()) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); @@ -852,6 +869,15 @@ private: return binding; } + bool IsRenderTargetEnabled(u32 rt) const { + for (u32 component = 0; component < 4; ++component) { + if (header.ps.IsColorComponentOutputEnabled(rt, component)) { + return true; + } + } + return false; + } + bool IsInputAttributeArray() const { return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || stage == ShaderType::Geometry; @@ -974,6 +1000,11 @@ private: return {OpLoad(t_float, registers.at(index)), Type::Float}; } + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + const u32 index = cv->GetIndex(); + return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; + } + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { return {Constant(t_uint, immediate->GetValue()), Type::Uint}; } @@ -1045,9 +1076,12 @@ private: return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), Type::Float}; case 2: - return {OpLoad(t_uint, instance_index), Type::Uint}; + return { + OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), + Type::Int}; case 3: - return {OpLoad(t_uint, vertex_index), Type::Uint}; + return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), + Type::Int}; } UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); return {Constant(t_uint, 0U), Type::Uint}; @@ -1115,15 +1149,7 @@ private: } if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - - Id offset = OpISub(t_uint, real, base); - offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); - return {OpLoad(t_float, - OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), - Type::Float}; + return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; } if (const auto lmem = std::get_if<LmemNode>(&*node)) { @@ -1134,10 +1160,7 @@ private: } if (const auto smem = std::get_if<SmemNode>(&*node)) { - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); - return {OpLoad(t_uint, pointer), Type::Uint}; + return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; } if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { @@ -1331,20 +1354,13 @@ private: target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; + target = {GetSharedMemoryPointer(*smem), Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), - Type::Float}; + } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { + target = {custom_variables.at(cv->GetIndex()), Type::Float}; } else { UNIMPLEMENTED(); @@ -1743,8 +1759,16 @@ private: } Expression ImageLoad(Operation operation) { - UNIMPLEMENTED(); - return {}; + if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { + return {v_float_zero, Type::Float}; + } + + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; + + const Id coords = GetCoordinates(operation, Type::Int); + const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); + + return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; } Expression ImageStore(Operation operation) { @@ -1796,11 +1820,16 @@ private: return {}; } - Expression UAtomicAdd(Operation operation) { - const auto& smem = std::get<SmemNode>(*operation[0]); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); + Expression AtomicAdd(Operation operation) { + Id pointer; + if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { + pointer = GetSharedMemoryPointer(*smem); + } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { + pointer = GetGlobalMemoryPointer(*gmem); + } else { + UNREACHABLE(); + return {Constant(t_uint, 0), Type::Uint}; + } const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id semantics = Constant(t_uint, 0U); @@ -1889,19 +1918,14 @@ private: // rendertargets/components are skipped in the register assignment. u32 current_reg = 0; for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - if (!specialization.enabled_rendertargets[rt]) { - // Skip rendertargets that are not enabled - continue; - } // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { - const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component); - if (header.ps.IsColorComponentOutputEnabled(rt, component)) { - OpStore(pointer, SafeGetRegister(current_reg)); - ++current_reg; - } else { - OpStore(pointer, component == 3 ? v_float_one : v_float_zero); + if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { + continue; } + const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); + OpStore(pointer, SafeGetRegister(current_reg)); + ++current_reg; } } if (header.ps.omap.depth) { @@ -2240,6 +2264,22 @@ private: return {}; } + Id GetGlobalMemoryPointer(const GmemNode& gmem) { + const Id real = AsUint(Visit(gmem.GetRealAddress())); + const Id base = AsUint(Visit(gmem.GetBaseAddress())); + const Id diff = OpISub(t_uint, real, base); + const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + const Id buffer = global_buffers.at(gmem.GetDescriptor()); + return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); + } + + Id GetSharedMemoryPointer(const SmemNode& smem) { + ASSERT(stage == ShaderType::Compute); + Id address = AsUint(Visit(smem.GetAddress())); + address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); + return OpAccessChain(t_smem_uint, shared_memory, address); + } + static constexpr std::array operation_decompilers = { &SPIRVDecompiler::Assign, @@ -2386,7 +2426,7 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::UAtomicAdd, + &SPIRVDecompiler::AtomicAdd, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, @@ -2482,9 +2522,9 @@ private: Id t_smem_uint{}; - const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); + const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); + Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); const Id t_gmem_struct = MemberDecorate( Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); @@ -2505,6 +2545,7 @@ private: Id out_vertex{}; Id in_vertex{}; std::map<u32, Id> registers; + std::map<u32, Id> custom_variables; std::map<Tegra::Shader::Pred, Id> predicates; std::map<u32, Id> flow_variables; Id local_memory{}; @@ -2520,6 +2561,8 @@ private: Id instance_index{}; Id vertex_index{}; + Id base_instance{}; + Id base_vertex{}; std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; Id frag_depth{}; Id frag_coord{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 10794be1c..f5dc14d9e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -102,9 +102,6 @@ struct Specialization final { Maxwell::TessellationSpacing spacing{}; bool clockwise{}; } tessellation; - - // Fragment specific - std::bitset<8> enabled_rendertargets; }; // Old gcc versions don't consider this trivially copyable. // static_assert(std::is_trivially_copyable_v<Specialization>); diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index a2f0044ba..cca13bcde 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -65,8 +65,8 @@ public: void DetachSegment(ASTNode start, ASTNode end); void Remove(ASTNode node); - ASTNode first{}; - ASTNode last{}; + ASTNode first; + ASTNode last; }; class ASTProgram { @@ -299,9 +299,9 @@ private: friend class ASTZipper; ASTData data; - ASTNode parent{}; - ASTNode next{}; - ASTNode previous{}; + ASTNode parent; + ASTNode next; + ASTNode previous; ASTZipper* manager{}; }; diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle return value; } +std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { + if (bound_buffer_saved) { + return bound_buffer; + } + if (!engine) { + return std::nullopt; + } + bound_buffer_saved = true; + bound_buffer = engine->GetBoundBuffer(); + return bound_buffer; +} + void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes bindless_samplers.insert_or_assign({buffer, offset}, sampler); } +void ConstBufferLocker::SetBoundBuffer(u32 buffer) { + bound_buffer_saved = true; + bound_buffer = buffer; +} + bool ConstBufferLocker::IsConsistent() const { if (!engine) { return false; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -10,6 +10,7 @@ #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" namespace VideoCommon::Shader { @@ -40,6 +41,8 @@ public: std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + std::optional<u32> ObtainBoundBuffer(); + /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -49,6 +52,9 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + /// Set the bound buffer for this locker. + void SetBoundBuffer(u32 buffer); + /// Checks keys and samplers against engine's current const buffers. Returns true if they are /// the same value, false otherwise; bool IsConsistent() const; @@ -71,12 +77,27 @@ public: return bindless_samplers; } + /// Gets bound buffer used on this shader + u32 GetBoundBuffer() const { + return bound_buffer; + } + + /// Obtains access to the guest driver's profile. + VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { + if (engine) { + return &engine->AccessGuestDriverProfile(); + } + return nullptr; + } + private: const Tegra::Engines::ShaderType stage; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; + bool bound_buffer_saved{}; + u32 bound_buffer{}; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <cstring> +#include <limits> #include <set> #include <fmt/format.h> @@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); + return; + } + if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + return; + } + u32 count{}; + std::vector<u32> bound_offsets; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + ++count; + bound_offsets.emplace_back(sampler.GetOffset()); + } + if (count > 1) { + gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + } +} + +std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, + VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + return std::nullopt; + } + const u32 base_offset = sampler_to_deduce.GetOffset(); + u32 max_offset{std::numeric_limits<u32>::max()}; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + if (sampler.GetOffset() > base_offset) { + max_offset = std::min(sampler.GetOffset(), max_offset); + } + } + if (max_offset == std::numeric_limits<u32>::max()) { + return std::nullopt; + } + return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); +} + } // Anonymous namespace class ASTDecoder { @@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } +void ShaderIR::PostDecode() { + // Deduce texture handler size if needed + auto gpu_driver = locker.AccessGuestDriverProfile(); + DeduceTextureHandlerSize(gpu_driver, used_samplers); + // Deduce Indexed Samplers + if (!uses_indexed_samplers) { + return; + } + for (auto& sampler : used_samplers) { + if (!sampler.IsIndexed()) { + continue; + } + if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { + sampler.SetSize(*size); + } else { + LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); + sampler.SetSize(1); + } + } +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index fcedd2af6..90240c765 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -21,7 +21,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() -> Node { + Node op_b = [&] { if (instr.is_b_imm) { return GetImmediate19(instr); } else if (instr.is_b_gpr) { @@ -141,6 +141,15 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::FCMP_R: { + UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); + Node op_c = GetRegister(instr.gpr39); + Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); + SetRegister( + bb, instr.gpr0, + Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); + break; + } case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 371fae127..21366869d 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_R: return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_RC: return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::ICMP_IMM: return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: @@ -297,7 +297,7 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod const Node one = Immediate(1); const Node two = Immediate(2); - Node value{}; + Node value; for (u32 i = 0; i < lop_iterations; ++i) { const Node shift_amount = Immediate(i); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 8be1119df..70d1c055b 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -17,10 +17,13 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { + const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { + case OpCode::Id::BFI_RC: + return {GetRegister(instr.gpr39), + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::BFI_IMM_R: - return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; + return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: UNREACHABLE(); return {Immediate(0), Immediate(0)}; diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 0eeb75559..6ead42070 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -83,14 +83,14 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { const bool input_signed = instr.conversion.is_input_signed; - if (instr.conversion.src_size == Register::Size::Byte) { - const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; - if (offset > 0) { - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset)); + if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { + ASSERT(instr.conversion.src_size == Register::Size::Byte || + instr.conversion.src_size == Register::Size::Short); + if (instr.conversion.src_size == Register::Size::Short) { + ASSERT(offset == 0 || offset == 2); } - } else { - UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); + value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, + std::move(value), Immediate(offset * 8)); } value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,9 +19,12 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; +using Tegra::Shader::GlobalAtomicOp; +using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::StoreType; namespace { @@ -61,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { } } +Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), + Immediate(size)); +} + +Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), + std::move(offset), Immediate(size)); +} + +Node Sign16Extend(Node value) { + Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); + return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); +} + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -136,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); [[fallthrough]]; case OpCode::Id::LD_S: { - const auto GetMemory = [&](s32 offset) { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); - const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), - immediate_offset); - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) - : GetLocalMemory(address); + return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); + }; + const auto GetMemory = [&](s32 offset) { + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) + : GetLocalMemory(GetAddress(offset)); }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: - case Tegra::Shader::StoreType::Bits64: - case Tegra::Shader::StoreType::Bits128: { - const u32 count = [&]() { + case StoreType::Signed16: + SetRegister(bb, instr.gpr0, + Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); + break; + case StoreType::Bits32: + case StoreType::Bits64: + case StoreType::Bits128: { + const u32 count = [&] { switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: return 1; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: return 2; - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: return 4; default: UNREACHABLE(); @@ -212,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { // To handle unaligned loads get the bytes used to dereference global memory and extract // those bytes from the loaded u32. if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), - std::move(offset), Immediate(size)); + gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); } SetTemporary(bb, i, gmem); @@ -269,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; - const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L - ? &ShaderIR::SetLocalMemory - : &ShaderIR::SetSharedMemory; + const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; + const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; + const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Signed16: { + Node address = GetAddress(0); + Node memory = (this->*get_memory)(address); + (this->*set_memory)( + bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); + break; + } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); @@ -323,18 +354,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node value = GetRegister(instr.gpr0.Value() + i); if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, - Immediate(size)); + const u32 mask = GetUnalignedMask(type); + value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::ATOM: { + UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", + static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", + static_cast<int>(instr.atom.type.Value())); + + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::ATOMS: { UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", static_cast<int>(instr.atoms.operation.Value())); @@ -348,7 +393,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node memory = GetSharedMemory(std::move(address)); Node data = GetRegister(instr.gpr20); - Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); + Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 7321698b2..4944e9d69 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -69,13 +69,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case OpCode::Id::MOV_SYS: { const Node value = [this, instr] { switch (instr.sys20) { + case SystemVariable::LaneId: + LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); + return Immediate(0U); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); - return Immediate(0u); + return Immediate(0U); case SystemVariable::Tid: { Node value = Immediate(0); value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); @@ -188,7 +191,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", static_cast<u32>(cc)); - if (disable_flow_stack) { + if (decompiled) { break; } @@ -200,7 +203,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", static_cast<u32>(cc)); - if (disable_flow_stack) { + if (decompiled) { break; } diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index d419e9c45..3b391d3e6 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -10,8 +10,80 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::ShfType; +using Tegra::Shader::ShfXmode; + +namespace { + +Node IsFull(Node shift) { + return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); +} + +Node Shift(OperationCode opcode, Node value, Node shift) { + Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); + Node shifted = Operation(opcode, move(value), shift); + return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); +} + +Node ClampShift(Node shift, s32 size = 32) { + shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); + return Operation(OperationCode::IMin, move(shift), Immediate(size)); +} + +Node WrapShift(Node shift, s32 size = 32) { + return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); +} + +Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { + // These values are used when the shift value is less than 32 + Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); + Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); + Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); + + if (type == ShfType::Bits32) { + // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits + return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); + } + + // And these when it's larger than or 32 + const bool is_signed = type == ShfType::S64; + const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); + Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); + Node greater = Shift(opcode, high, move(reduced)); + + Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); + Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); + + Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); + return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); +} + +Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { + // These values are used when the shift value is less than 32 + Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); + Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); + Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); + + if (type == ShfType::Bits32) { + // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits + return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); + } + + // And these when it's larger than or 32 + Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); + Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); + + Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); + Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); + + Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); + return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); +} + +} // Anonymous namespace u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -28,29 +100,48 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { } }(); - switch (opcode->get().GetId()) { + switch (const auto opid = opcode->get().GetId(); opid) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - if (instr.shr.wrap) { - op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f)); - } else { - op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0)); - op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31)); - } + op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - std::move(op_a), std::move(op_b)); + move(op_a), move(op_b)); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, move(value)); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: case OpCode::Id::SHL_IMM: { - const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); + Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetRegister(bb, instr.gpr0, move(value)); + break; + } + case OpCode::Id::SHF_RIGHT_R: + case OpCode::Id::SHF_RIGHT_IMM: + case OpCode::Id::SHF_LEFT_R: + case OpCode::Id::SHF_LEFT_IMM: { + UNIMPLEMENTED_IF(instr.generates_cc); + UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", + static_cast<int>(instr.shf.xmode.Value())); + + if (instr.is_b_imm) { + op_b = Immediate(static_cast<u32>(instr.shf.immediate)); + } + const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; + Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); + + Node negated_shift = Operation(OperationCode::INegate, shift); + Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); + + const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; + Node value = (is_right ? ShiftRight : ShiftLeft)( + move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); + + SetRegister(bb, instr.gpr0, move(value)); break; } default: diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index cd984f763..bee7d8cad 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, + {}, {}, component, element, {}}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -161,16 +162,16 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { case OpCode::Id::TXD: { UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented"); + const bool is_array = instr.txd.is_array != 0; u64 base_reg = instr.gpr8.Value(); const auto derivate_reg = instr.gpr20.Value(); const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - - const Sampler* sampler = is_bindless - ? GetBindlessSampler(base_reg, {{texture_type, false, false}}) - : GetSampler(instr.sampler, {{texture_type, false, false}}); + Node index_var{}; + const Sampler* sampler = + is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; if (sampler == nullptr) { for (u32 element = 0; element < values.size(); ++element) { @@ -179,6 +180,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { WriteTexInstructionFloat(bb, instr, values); break; } + if (is_bindless) { base_reg++; } @@ -192,8 +194,15 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { derivates.push_back(GetRegister(derivate_reg + derivate + 1)); } + Node array_node = {}; + if (is_array) { + const Node info_reg = GetRegister(base_reg + coord_count); + array_node = BitfieldExtract(info_reg, 0, 16); + } + for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, + {}, {}, {}, element, index_var}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -208,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -233,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -259,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -302,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -376,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, - info.is_buffer); + info.is_buffer, false); } -const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); - const auto [base_sampler, buffer, offset] = - TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_sampler != nullptr); - if (base_sampler == nullptr) { + const auto [base_node, tracked_sampler_info] = + TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); + ASSERT(base_node != nullptr); + if (base_node == nullptr) { return nullptr; } - const auto info = GetSamplerInfo(sampler_info, offset, buffer); + if (const auto bindless_sampler_info = + std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { + const u32 buffer = bindless_sampler_info->GetIndex(); + const u32 offset = bindless_sampler_info->GetOffset(); + const auto info = GetSamplerInfo(sampler_info, offset, buffer); + + // If this sampler has already been used, return the existing mapping. + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [buffer = buffer, offset = offset](const Sampler& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != used_samplers.end()) { + ASSERT(it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); + return &*it; + } - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { - return entry.GetBuffer() == buffer && entry.GetOffset() == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && - it->IsShadow() == info.is_shadow); - return &*it; - } + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer, false); + } else if (const auto array_sampler_info = + std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { + const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; + index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); + const auto info = GetSamplerInfo(sampler_info, base_offset); + + // If this sampler has already been used, return the existing mapping. + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), + [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); + if (it != used_samplers.end()) { + ASSERT(!it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && + it->IsBuffer() == info.is_buffer && it->IsIndexed()); + return &*it; + } - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, - info.is_shadow, info.is_buffer); + uses_indexed_samplers = true; + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, + info.is_shadow, info.is_buffer, true); + } + return nullptr; } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -483,66 +522,53 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { - const auto is_array = static_cast<bool>(array); - const auto is_shadow = static_cast<bool>(depth_compare); + const bool is_array = array != nullptr; + const bool is_shadow = depth_compare != nullptr; const bool is_bindless = bindless_reg.has_value(); - UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || - (texture_type == TextureType::TextureCube && is_array && is_shadow), - "This method is not supported."); + UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow); + ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, + "Illegal texture type"); const SamplerInfo info{texture_type, is_array, is_shadow, false}; - const Sampler* sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); - Node4 values; - if (sampler == nullptr) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; + Node index_var; + const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) + : GetSampler(instr.sampler, info); + if (!sampler) { + return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; } const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. - const bool gl_lod_supported = - !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || - (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); - - const OperationCode read_method = - (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture; - - UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); + const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; Node bias; Node lod; - if (process_mode != TextureProcessMode::None && gl_lod_supported) { - switch (process_mode) { - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register - // indexed by the gpr20 field with an offset depending on the - // usage of the other registers - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); - break; - } + switch (process_mode) { + case TextureProcessMode::None: + break; + case TextureProcessMode::LZ: + lod = Immediate(0.0f); + break; + case TextureProcessMode::LB: + // If present, lod or bias are always stored in the register indexed by the gpr20 field with + // an offset depending on the usage of the other registers. + bias = GetRegister(instr.gpr20.Value() + bias_offset); + break; + case TextureProcessMode::LL: + lod = GetRegister(instr.gpr20.Value() + bias_offset); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); + break; } + Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto copy_coords = coords; - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; - values[element] = Operation(read_method, meta, std::move(copy_coords)); + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, + lod, {}, element, index_var}; + values[element] = Operation(opcode, meta, coords); } return values; @@ -589,7 +615,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); } - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -625,7 +651,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, const Node array = is_array ? GetRegister(array_register) : nullptr; - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -656,7 +682,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); const SamplerInfo info{texture_type, is_array, depth_compare, false}; - const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { @@ -685,7 +712,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; + *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, + index_var}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -718,7 +746,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } @@ -768,7 +796,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..a0a7b9111 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,7 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - UAtomicAdd, /// (smem, uint) -> uint + AtomicAdd, /// (memory, {u}int) -> {u}int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void @@ -212,6 +212,7 @@ enum class MetaStackClass { class OperationNode; class ConditionalNode; class GprNode; +class CustomVarNode; class ImmediateNode; class InternalFlagNode; class PredicateNode; @@ -223,26 +224,32 @@ class SmemNode; class GmemNode; class CommentNode; -using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, +using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>; using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; +class BindlessSamplerNode; +class ArraySamplerNode; + +using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; +using TrackSampler = std::shared_ptr<TrackSamplerData>; + class Sampler { public: /// This constructor is for bound samplers constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_buffer{is_buffer} {} + is_buffer{is_buffer}, is_indexed{is_indexed} {} /// This constructor is for bindless samplers constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} + is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} constexpr u32 GetIndex() const { return index; @@ -276,16 +283,72 @@ public: return is_bindless; } + constexpr bool IsIndexed() const { + return is_indexed; + } + + constexpr u32 Size() const { + return size; + } + + constexpr void SetSize(u32 new_size) { + size = new_size; + } + private: u32 index{}; ///< Emulated index given for the this sampler. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). + u32 size{}; ///< Size of the sampler if indexed. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed{}; ///< Whether this sampler is an indexed array of textures. +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class ArraySamplerNode final { +public: + explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) + : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetBaseOffset() const { + return base_offset; + } + + constexpr u32 GetIndexVar() const { + return bindless_var; + } + +private: + u32 index; + u32 base_offset; + u32 bindless_var; +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class BindlessSamplerNode final { +public: + explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { + return offset; + } + +private: + u32 index; + u32 offset; }; class Image final { @@ -380,8 +443,9 @@ struct MetaTexture { std::vector<Node> derivates; Node bias; Node lod; - Node component{}; + Node component; u32 element{}; + Node index; }; struct MetaImage { @@ -488,6 +552,19 @@ private: Tegra::Shader::Register index{}; }; +/// A custom variable +class CustomVarNode final { +public: + explicit constexpr CustomVarNode(u32 index) : index{index} {} + + constexpr u32 GetIndex() const { + return index; + } + +private: + u32 index{}; +}; + /// A 32-bits value that represents an immediate value class ImmediateNode final { public: diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); } +template <typename T, typename... Args> +TrackSampler MakeTrackSampler(Args&&... args) { + static_assert(std::is_convertible_v<T, TrackSamplerData>); + return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); +} + template <typename... Args> Node Operation(OperationCode code, Args&&... args) { if constexpr (sizeof...(args) == 0) { diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); + PostDecode(); } ShaderIR::~ShaderIR() = default; @@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { return MakeNode<GprNode>(reg); } +Node ShaderIR::GetCustomVariable(u32 id) { + return MakeNode<CustomVarNode>(id); +} + Node ShaderIR::GetImmediate19(Instruction instr) { return Immediate(instr.alu.GetImm20_19()); } @@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { return id; } +u32 ShaderIR::NewCustomVariable() { + return num_custom_variables++; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -180,6 +180,10 @@ public: return amend_code[index]; } + u32 GetNumCustomVariables() const { + return num_custom_variables; + } + private: friend class ASTDecoder; @@ -191,6 +195,7 @@ private: }; void Decode(); + void PostDecode(); NodeBlock DecodeRange(u32 begin, u32 end); void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); @@ -235,6 +240,8 @@ private: /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); + /// Generates a node for a custom variable + Node GetCustomVariable(u32 id); /// Generates a node representing a 19-bit immediate value Node GetImmediate19(Tegra::Shader::Instruction instr); /// Generates a node representing a 32-bit immediate value @@ -321,7 +328,7 @@ private: std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses a texture sampler for a bindless texture. - const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, + const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses an image. @@ -387,6 +394,9 @@ private: std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; + std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, @@ -399,6 +409,8 @@ private: /// Register new amending code and obtain the reference id. std::size_t DeclareAmend(Node new_amend); + u32 NewCustomVariable(); + const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; @@ -414,6 +426,7 @@ private: NodeBlock global_code; ASTManager program_manager{true, true}; std::vector<Node> amend_code; + u32 num_custom_variables{}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; @@ -431,6 +444,7 @@ private: bool uses_instance_id{}; bool uses_vertex_id{}; bool uses_warps{}; + bool uses_indexed_samplers{}; Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..face8c943 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "video_core/shader/node.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } return {}; } + +std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { + if (operation.GetCode() != OperationCode::UAdd) { + return std::nullopt; + } + Node gpr; + Node offset; + ASSERT(operation.GetOperandsCount() == 2); + for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { + Node operand = operation[i]; + if (std::holds_alternative<ImmediateNode>(*operand)) { + offset = operation[i]; + } else if (std::holds_alternative<GprNode>(*operand)) { + gpr = operation[i]; + } + } + if (offset && gpr) { + return std::make_pair(gpr, offset); + } + return std::nullopt; +} + +bool AmendNodeCv(std::size_t amend_index, Node node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { + operation->SetAmendIndex(amend_index); + return true; + } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + conditional->SetAmendIndex(amend_index); + return true; + } + return false; +} + } // Anonymous namespace +std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor) { + if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { + // Constant buffer found, test if it's an immediate + const auto offset = cbuf->GetOffset(); + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + auto track = + MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); + return {tracked, track}; + } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + auto bound_buffer = locker.ObtainBoundBuffer(); + if (!bound_buffer) { + return {}; + } + if (*bound_buffer != cbuf->GetIndex()) { + return {}; + } + auto pair = DecoupleIndirectRead(*operation); + if (!pair) { + return {}; + } + auto [gpr, base_offset] = *pair; + const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); + auto gpu_driver = locker.AccessGuestDriverProfile(); + if (gpu_driver == nullptr) { + return {}; + } + const u32 bindless_cv = NewCustomVariable(); + const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, + Immediate(gpu_driver->GetTextureHandlerSize())); + + const Node cv_node = GetCustomVariable(bindless_cv); + Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); + const std::size_t amend_index = DeclareAmend(amend_op); + AmendNodeCv(amend_index, code[cursor]); + // TODO Implement Bindless Index custom variable + auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), + offset_inm->GetValue(), bindless_cv); + return {tracked, track}; + } + return {}; + } + if (const auto gpr = std::get_if<GprNode>(&*tracked)) { + if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { + return {}; + } + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); + if (!source) { + return {}; + } + return TrackBindlessSampler(source, code, new_cursor); + } + if (const auto operation = std::get_if<OperationNode>(&*tracked)) { + for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { + if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); + std::get<0>(found)) { + // Cbuf found in operand. + return found; + } + } + return {}; + } + if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { + const auto& conditional_code = conditional->GetCode(); + return TrackBindlessSampler(tracked, conditional_code, + static_cast<s64>(conditional_code.size())); + } + return {}; +} + std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 829268b4c..84469b7ba 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -135,7 +135,7 @@ std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& i for (u32 level = 0; level < mipmaps; level++) { const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(width, height, layer, level); + result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); } } return result; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f4c015635..0d105d386 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -721,7 +721,6 @@ private: std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. @@ -733,14 +732,18 @@ private: return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } + const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None && - (params.target != SurfaceTarget::Texture3D || - current_surface->MatchTarget(params.target))) { - if (struct_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params, is_render); - } else { - return RebuildSurface(current_surface, params, is_render); + if (struct_result != MatchStructureResult::None) { + const auto& old_params = current_surface->GetSurfaceParams(); + const bool not_3d = params.target != SurfaceTarget::Texture3D && + old_params.target != SurfaceTarget::Texture3D; + if (not_3d || current_surface->MatchTarget(params.target)) { + if (struct_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params, is_render); + } else { + return RebuildSurface(current_surface, params, is_render); + } } } } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 8e947394c..a5f81a8a0 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -3,19 +3,32 @@ // Refer to the license.txt file included. #include <memory> +#include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" #include "video_core/gpu_asynch.h" #include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#ifdef HAS_VULKAN +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#endif #include "video_core/video_core.h" namespace VideoCore { std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); + switch (Settings::values.renderer_backend) { + case Settings::RendererBackend::OpenGL: + return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); +#ifdef HAS_VULKAN + case Settings::RendererBackend::Vulkan: + return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); +#endif + default: + return nullptr; + } } std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { diff --git a/src/web_service/telemetry_json.cpp b/src/web_service/telemetry_json.cpp index 9156ce802..7538389bf 100644 --- a/src/web_service/telemetry_json.cpp +++ b/src/web_service/telemetry_json.cpp @@ -117,6 +117,7 @@ bool TelemetryJson::SubmitTestcase() { impl->SerializeSection(Telemetry::FieldType::Session, "Session"); impl->SerializeSection(Telemetry::FieldType::UserFeedback, "UserFeedback"); impl->SerializeSection(Telemetry::FieldType::UserSystem, "UserSystem"); + impl->SerializeSection(Telemetry::FieldType::UserConfig, "UserConfig"); auto content = impl->TopSection().dump(); Client client(impl->host, impl->username, impl->token); diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 6683f459f..737ffe409 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -73,14 +73,12 @@ struct Client::Impl { if (!parsedUrl.GetPort(&port)) { port = HTTP_PORT; } - cli = std::make_unique<httplib::Client>(parsedUrl.m_Host.c_str(), port, - TIMEOUT_SECONDS); + cli = std::make_unique<httplib::Client>(parsedUrl.m_Host.c_str(), port); } else if (parsedUrl.m_Scheme == "https") { if (!parsedUrl.GetPort(&port)) { port = HTTPS_PORT; } - cli = std::make_unique<httplib::SSLClient>(parsedUrl.m_Host.c_str(), port, - TIMEOUT_SECONDS); + cli = std::make_unique<httplib::SSLClient>(parsedUrl.m_Host.c_str(), port); } else { LOG_ERROR(WebService, "Bad URL scheme {}", parsedUrl.m_Scheme); return Common::WebResult{Common::WebResult::Code::InvalidURL, "Bad URL scheme"}; @@ -90,6 +88,7 @@ struct Client::Impl { LOG_ERROR(WebService, "Invalid URL {}", host + path); return Common::WebResult{Common::WebResult::Code::InvalidURL, "Invalid URL"}; } + cli->set_timeout_sec(TIMEOUT_SECONDS); httplib::Headers params; if (!jwt.empty()) { diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 11ae1e66e..b841e63fa 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -36,9 +36,6 @@ add_executable(yuzu configuration/configure_filesystem.cpp configuration/configure_filesystem.h configuration/configure_filesystem.ui - configuration/configure_gamelist.cpp - configuration/configure_gamelist.h - configuration/configure_gamelist.ui configuration/configure_general.cpp configuration/configure_general.h configuration/configure_general.ui @@ -75,6 +72,9 @@ add_executable(yuzu configuration/configure_touchscreen_advanced.cpp configuration/configure_touchscreen_advanced.h configuration/configure_touchscreen_advanced.ui + configuration/configure_ui.cpp + configuration/configure_ui.h + configuration/configure_ui.ui configuration/configure_web.cpp configuration/configure_web.h configuration/configure_web.ui @@ -200,3 +200,8 @@ if (MSVC) copy_yuzu_SDL_deps(yuzu) copy_yuzu_unicorn_deps(yuzu) endif() + +if (ENABLE_VULKAN) + target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include) + target_compile_definitions(yuzu PRIVATE HAS_VULKAN) +endif() diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 7490fb718..55a37fffa 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -2,19 +2,30 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <glad/glad.h> + #include <QApplication> #include <QHBoxLayout> #include <QKeyEvent> +#include <QMessageBox> #include <QOffscreenSurface> #include <QOpenGLWindow> #include <QPainter> #include <QScreen> +#include <QStringList> #include <QWindow> +#ifdef HAS_VULKAN +#include <QVulkanWindow> +#endif + #include <fmt/format.h> + +#include "common/assert.h" #include "common/microprofile.h" #include "common/scm_rev.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" +#include "core/frontend/scope_acquire_window_context.h" #include "core/settings.h" #include "input_common/keyboard.h" #include "input_common/main.h" @@ -114,19 +125,10 @@ private: QOpenGLContext context; }; -// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL -// context. -// The corresponding functionality is handled in EmuThread instead -class GGLWidgetInternal : public QOpenGLWindow { +class GWidgetInternal : public QWindow { public: - GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context) - : QOpenGLWindow(shared_context), parent(parent) {} - - void paintEvent(QPaintEvent* ev) override { - if (do_painting) { - QPainter painter(this); - } - } + GWidgetInternal(GRenderWindow* parent) : parent(parent) {} + virtual ~GWidgetInternal() = default; void resizeEvent(QResizeEvent* ev) override { parent->OnClientAreaResized(ev->size().width(), ev->size().height()); @@ -182,11 +184,47 @@ public: do_painting = true; } + std::pair<unsigned, unsigned> GetSize() const { + return std::make_pair(width(), height()); + } + +protected: + bool IsPaintingEnabled() const { + return do_painting; + } + private: GRenderWindow* parent; - bool do_painting; + bool do_painting = false; +}; + +// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL +// context. +// The corresponding functionality is handled in EmuThread instead +class GGLWidgetInternal final : public GWidgetInternal, public QOpenGLWindow { +public: + GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context) + : GWidgetInternal(parent), QOpenGLWindow(shared_context) {} + ~GGLWidgetInternal() override = default; + + void paintEvent(QPaintEvent* ev) override { + if (IsPaintingEnabled()) { + QPainter painter(this); + } + } }; +#ifdef HAS_VULKAN +class GVKWidgetInternal final : public GWidgetInternal { +public: + GVKWidgetInternal(GRenderWindow* parent, QVulkanInstance* instance) : GWidgetInternal(parent) { + setSurfaceType(QSurface::SurfaceType::VulkanSurface); + setVulkanInstance(instance); + } + ~GVKWidgetInternal() override = default; +}; +#endif + GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread) : QWidget(parent), emu_thread(emu_thread) { setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") @@ -201,9 +239,15 @@ GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread) GRenderWindow::~GRenderWindow() { InputCommon::Shutdown(); + + // Avoid an unordered destruction that generates a segfault + delete child; } void GRenderWindow::moveContext() { + if (!context) { + return; + } DoneCurrent(); // If the thread started running, move the GL Context to the new thread. Otherwise, move it @@ -215,8 +259,9 @@ void GRenderWindow::moveContext() { } void GRenderWindow::SwapBuffers() { - context->swapBuffers(child); - + if (context) { + context->swapBuffers(child); + } if (!first_frame) { first_frame = true; emit FirstFrameDisplayed(); @@ -224,15 +269,38 @@ void GRenderWindow::SwapBuffers() { } void GRenderWindow::MakeCurrent() { - context->makeCurrent(child); + if (context) { + context->makeCurrent(child); + } } void GRenderWindow::DoneCurrent() { - context->doneCurrent(); + if (context) { + context->doneCurrent(); + } } void GRenderWindow::PollEvents() {} +bool GRenderWindow::IsShown() const { + return !isMinimized(); +} + +void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const { +#ifdef HAS_VULKAN + const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); + const VkInstance instance_copy = vk_instance->vkInstance(); + const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child); + + std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); + std::memcpy(instance, &instance_copy, sizeof(instance_copy)); + std::memcpy(surface, &surface_copy, sizeof(surface_copy)); +#else + UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan"); +#endif +} + // On Qt 5.0+, this correctly gets the size of the framebuffer (pixels). // // Older versions get the window size (density independent pixels), @@ -241,10 +309,9 @@ void GRenderWindow::PollEvents() {} void GRenderWindow::OnFramebufferSizeChanged() { // Screen changes potentially incur a change in screen DPI, hence we should update the // framebuffer size - const qreal pixel_ratio = GetWindowPixelRatio(); - const u32 width = child->QPaintDevice::width() * pixel_ratio; - const u32 height = child->QPaintDevice::height() * pixel_ratio; - UpdateCurrentFramebufferLayout(width, height); + const qreal pixelRatio{GetWindowPixelRatio()}; + const auto size{child->GetSize()}; + UpdateCurrentFramebufferLayout(size.first * pixelRatio, size.second * pixelRatio); } void GRenderWindow::ForwardKeyPressEvent(QKeyEvent* event) { @@ -290,7 +357,7 @@ qreal GRenderWindow::GetWindowPixelRatio() const { } std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF pos) const { - const qreal pixel_ratio = GetWindowPixelRatio(); + const qreal pixel_ratio{GetWindowPixelRatio()}; return {static_cast<u32>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})), static_cast<u32>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; } @@ -356,50 +423,46 @@ std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedCont return std::make_unique<GGLContext>(context.get()); } -void GRenderWindow::InitRenderTarget() { +bool GRenderWindow::InitRenderTarget() { shared_context.reset(); context.reset(); - - delete child; - child = nullptr; - - delete container; - container = nullptr; - - delete layout(); + if (child) { + delete child; + } + if (container) { + delete container; + } + if (layout()) { + delete layout(); + } first_frame = false; - // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, - // WA_DontShowOnScreen, WA_DeleteOnClose - QSurfaceFormat fmt; - fmt.setVersion(4, 3); - fmt.setProfile(QSurfaceFormat::CompatibilityProfile); - fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); - // TODO: expose a setting for buffer value (ie default/single/double/triple) - fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); - shared_context = std::make_unique<QOpenGLContext>(); - shared_context->setFormat(fmt); - shared_context->create(); - context = std::make_unique<QOpenGLContext>(); - context->setShareContext(shared_context.get()); - context->setFormat(fmt); - context->create(); - fmt.setSwapInterval(0); + switch (Settings::values.renderer_backend) { + case Settings::RendererBackend::OpenGL: + if (!InitializeOpenGL()) { + return false; + } + break; + case Settings::RendererBackend::Vulkan: + if (!InitializeVulkan()) { + return false; + } + break; + } - child = new GGLWidgetInternal(this, shared_context.get()); container = QWidget::createWindowContainer(child, this); - QBoxLayout* layout = new QHBoxLayout(this); + layout->addWidget(container); layout->setMargin(0); setLayout(layout); - // Reset minimum size to avoid unwanted resizes when this function is called for a second time. + // Reset minimum required size to avoid resizing issues on the main window after restarting. setMinimumSize(1, 1); - // Show causes the window to actually be created and the OpenGL context as well, but we don't - // want the widget to be shown yet, so immediately hide it. + // Show causes the window to actually be created and the gl context as well, but we don't want + // the widget to be shown yet, so immediately hide it. show(); hide(); @@ -410,9 +473,17 @@ void GRenderWindow::InitRenderTarget() { OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); OnFramebufferSizeChanged(); - NotifyClientAreaSizeChanged(std::pair<unsigned, unsigned>(child->width(), child->height())); + NotifyClientAreaSizeChanged(child->GetSize()); BackupGeometry(); + + if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { + if (!LoadOpenGL()) { + return false; + } + } + + return true; } void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { @@ -441,6 +512,113 @@ void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal setMinimumSize(minimal_size.first, minimal_size.second); } +bool GRenderWindow::InitializeOpenGL() { + // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, + // WA_DontShowOnScreen, WA_DeleteOnClose + QSurfaceFormat fmt; + fmt.setVersion(4, 3); + fmt.setProfile(QSurfaceFormat::CompatibilityProfile); + fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); + // TODO: expose a setting for buffer value (ie default/single/double/triple) + fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); + shared_context = std::make_unique<QOpenGLContext>(); + shared_context->setFormat(fmt); + shared_context->create(); + context = std::make_unique<QOpenGLContext>(); + context->setShareContext(shared_context.get()); + context->setFormat(fmt); + context->create(); + fmt.setSwapInterval(false); + + child = new GGLWidgetInternal(this, shared_context.get()); + return true; +} + +bool GRenderWindow::InitializeVulkan() { +#ifdef HAS_VULKAN + vk_instance = std::make_unique<QVulkanInstance>(); + vk_instance->setApiVersion(QVersionNumber(1, 1, 0)); + vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect); + if (Settings::values.renderer_debug) { + const auto supported_layers{vk_instance->supportedLayers()}; + const bool found = + std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) { + constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation"; + return layer.name == searched_layer; + }); + if (found) { + vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation"); + vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + } + if (!vk_instance->create()) { + QMessageBox::critical( + this, tr("Error while initializing Vulkan 1.1!"), + tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the " + "latest graphics drivers.")); + return false; + } + + child = new GVKWidgetInternal(this, vk_instance.get()); + return true; +#else + QMessageBox::critical(this, tr("Vulkan not available!"), + tr("yuzu has not been compiled with Vulkan support.")); + return false; +#endif +} + +bool GRenderWindow::LoadOpenGL() { + Core::Frontend::ScopeAcquireWindowContext acquire_context{*this}; + if (!gladLoadGL()) { + QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3!"), + tr("Your GPU may not support OpenGL 4.3, or you do not have the " + "latest graphics driver.")); + return false; + } + + QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions(); + if (!unsupported_gl_extensions.empty()) { + QMessageBox::critical( + this, tr("Error while initializing OpenGL!"), + tr("Your GPU may not support one or more required OpenGL extensions. Please ensure you " + "have the latest graphics driver.<br><br>Unsupported extensions:<br>") + + unsupported_gl_extensions.join(QStringLiteral("<br>"))); + return false; + } + return true; +} + +QStringList GRenderWindow::GetUnsupportedGLExtensions() const { + QStringList unsupported_ext; + + if (!GLAD_GL_ARB_buffer_storage) + unsupported_ext.append(QStringLiteral("ARB_buffer_storage")); + if (!GLAD_GL_ARB_direct_state_access) + unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); + if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) + unsupported_ext.append(QStringLiteral("ARB_vertex_type_10f_11f_11f_rev")); + if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) + unsupported_ext.append(QStringLiteral("ARB_texture_mirror_clamp_to_edge")); + if (!GLAD_GL_ARB_multi_bind) + unsupported_ext.append(QStringLiteral("ARB_multi_bind")); + if (!GLAD_GL_ARB_clip_control) + unsupported_ext.append(QStringLiteral("ARB_clip_control")); + + // Extensions required to support some texture formats. + if (!GLAD_GL_EXT_texture_compression_s3tc) + unsupported_ext.append(QStringLiteral("EXT_texture_compression_s3tc")); + if (!GLAD_GL_ARB_texture_compression_rgtc) + unsupported_ext.append(QStringLiteral("ARB_texture_compression_rgtc")); + if (!GLAD_GL_ARB_depth_buffer_float) + unsupported_ext.append(QStringLiteral("ARB_depth_buffer_float")); + + for (const QString& ext : unsupported_ext) + LOG_CRITICAL(Frontend, "Unsupported GL extension: {}", ext.toStdString()); + + return unsupported_ext; +} + void GRenderWindow::OnEmulationStarting(EmuThread* emu_thread) { this->emu_thread = emu_thread; child->DisablePainting(); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 2fc64895f..71a2fa321 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -7,17 +7,28 @@ #include <atomic> #include <condition_variable> #include <mutex> + #include <QImage> #include <QThread> #include <QWidget> + +#include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" class QKeyEvent; class QScreen; class QTouchEvent; +class QStringList; +class QSurface; +class QOpenGLContext; +#ifdef HAS_VULKAN +class QVulkanInstance; +#endif +class GWidgetInternal; class GGLWidgetInternal; +class GVKWidgetInternal; class GMainWindow; class GRenderWindow; class QSurface; @@ -123,6 +134,9 @@ public: void MakeCurrent() override; void DoneCurrent() override; void PollEvents() override; + bool IsShown() const override; + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; void ForwardKeyPressEvent(QKeyEvent* event); @@ -142,7 +156,7 @@ public: void OnClientAreaResized(u32 width, u32 height); - void InitRenderTarget(); + bool InitRenderTarget(); void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); @@ -165,10 +179,13 @@ private: void OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal_size) override; - QWidget* container = nullptr; - GGLWidgetInternal* child = nullptr; + bool InitializeOpenGL(); + bool InitializeVulkan(); + bool LoadOpenGL(); + QStringList GetUnsupportedGLExtensions() const; - QByteArray geometry; + QWidget* container = nullptr; + GWidgetInternal* child = nullptr; EmuThread* emu_thread; // Context that backs the GGLWidgetInternal (and will be used by core to render) @@ -177,9 +194,14 @@ private: // current std::unique_ptr<QOpenGLContext> shared_context; +#ifdef HAS_VULKAN + std::unique_ptr<QVulkanInstance> vk_instance; +#endif + /// Temporary storage of the screenshot taken QImage screenshot_image; + QByteArray geometry; bool first_frame = false; protected: diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index f92a4b3c3..6209fff75 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -10,6 +10,7 @@ #include "core/hle/service/acc/profile_manager.h" #include "core/hle/service/hid/controllers/npad.h" #include "input_common/main.h" +#include "input_common/udp/client.h" #include "yuzu/configuration/config.h" #include "yuzu/uisettings.h" @@ -429,6 +430,16 @@ void Config::ReadControlValues() { QStringLiteral("engine:motion_emu,update_period:100,sensitivity:0.01")) .toString() .toStdString(); + Settings::values.udp_input_address = + ReadSetting(QStringLiteral("udp_input_address"), + QString::fromUtf8(InputCommon::CemuhookUDP::DEFAULT_ADDR)) + .toString() + .toStdString(); + Settings::values.udp_input_port = static_cast<u16>( + ReadSetting(QStringLiteral("udp_input_port"), InputCommon::CemuhookUDP::DEFAULT_PORT) + .toInt()); + Settings::values.udp_pad_index = + static_cast<u8>(ReadSetting(QStringLiteral("udp_pad_index"), 0).toUInt()); qt_config->endGroup(); } @@ -613,8 +624,13 @@ void Config::ReadPathValues() { void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); + Settings::values.renderer_backend = + static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); + Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); + Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); Settings::values.resolution_factor = ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); + Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); Settings::values.use_frame_limit = ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); @@ -727,7 +743,6 @@ void Config::ReadUIValues() { void Config::ReadUIGamelistValues() { qt_config->beginGroup(QStringLiteral("UIGameList")); - UISettings::values.show_unknown = ReadSetting(QStringLiteral("show_unknown"), true).toBool(); UISettings::values.show_add_ons = ReadSetting(QStringLiteral("show_add_ons"), true).toBool(); UISettings::values.icon_size = ReadSetting(QStringLiteral("icon_size"), 64).toUInt(); UISettings::values.row_1_text_id = ReadSetting(QStringLiteral("row_1_text_id"), 3).toUInt(); @@ -911,6 +926,12 @@ void Config::SaveControlValues() { QString::fromStdString(Settings::values.motion_device), QStringLiteral("engine:motion_emu,update_period:100,sensitivity:0.01")); WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false); + WriteSetting(QStringLiteral("udp_input_address"), + QString::fromStdString(Settings::values.udp_input_address), + QString::fromUtf8(InputCommon::CemuhookUDP::DEFAULT_ADDR)); + WriteSetting(QStringLiteral("udp_input_port"), Settings::values.udp_input_port, + InputCommon::CemuhookUDP::DEFAULT_PORT); + WriteSetting(QStringLiteral("udp_pad_index"), Settings::values.udp_pad_index, 0); qt_config->endGroup(); } @@ -1039,8 +1060,12 @@ void Config::SavePathValues() { void Config::SaveRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); + WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); + WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); + WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); WriteSetting(QStringLiteral("resolution_factor"), static_cast<double>(Settings::values.resolution_factor), 1.0); + WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, @@ -1135,7 +1160,6 @@ void Config::SaveUIValues() { void Config::SaveUIGamelistValues() { qt_config->beginGroup(QStringLiteral("UIGameList")); - WriteSetting(QStringLiteral("show_unknown"), UISettings::values.show_unknown, true); WriteSetting(QStringLiteral("show_add_ons"), UISettings::values.show_add_ons, true); WriteSetting(QStringLiteral("icon_size"), UISettings::values.icon_size, 64); WriteSetting(QStringLiteral("row_1_text_id"), UISettings::values.row_1_text_id, 3); diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui index 372427ae2..67b990f1a 100644 --- a/src/yuzu/configuration/configure.ui +++ b/src/yuzu/configuration/configure.ui @@ -48,7 +48,7 @@ <string>General</string> </attribute> </widget> - <widget class="ConfigureGameList" name="gameListTab"> + <widget class="ConfigureUi" name="uiTab"> <attribute name="title"> <string>Game List</string> </attribute> @@ -166,9 +166,9 @@ <container>1</container> </customwidget> <customwidget> - <class>ConfigureGameList</class> + <class>ConfigureUi</class> <extends>QWidget</extends> - <header>configuration/configure_gamelist.h</header> + <header>configuration/configure_ui.h</header> <container>1</container> </customwidget> <customwidget> diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 90c1f9459..9631059c7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -36,6 +36,8 @@ void ConfigureDebug::SetConfiguration() { ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); ui->reporting_services->setChecked(Settings::values.reporting_services); ui->quest_flag->setChecked(Settings::values.quest_flag); + ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); } void ConfigureDebug::ApplyConfiguration() { @@ -46,6 +48,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); Settings::values.reporting_services = ui->reporting_services->isChecked(); Settings::values.quest_flag = ui->quest_flag->isChecked(); + Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Debugger::ToggleConsole(); Log::Filter filter; filter.ParseFilterString(Settings::values.log_filter); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index ce49569bb..e028c4c80 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -7,7 +7,7 @@ <x>0</x> <y>0</y> <width>400</width> - <height>474</height> + <height>467</height> </rect> </property> <property name="windowTitle"> @@ -103,6 +103,80 @@ </item> </layout> </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="groupBox_3"> + <property name="title"> + <string>Homebrew</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_5"> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_4"> + <item> + <widget class="QLabel" name="label_3"> + <property name="text"> + <string>Arguments String</string> + </property> + </widget> + </item> + <item> + <widget class="QLineEdit" name="homebrew_args_edit"/> + </item> + </layout> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="groupBox_4"> + <property name="title"> + <string>Graphics</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_6"> + <item> + <widget class="QCheckBox" name="enable_graphics_debugging"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="whatsThis"> + <string>When checked, the graphics API enters in a slower debugging mode</string> + </property> + <property name="text"> + <string>Enable Graphics Debugging</string> + </property> + </widget> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="groupBox_5"> + <property name="title"> + <string>Dump</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_6"> + <item> + <widget class="QCheckBox" name="dump_decompressed_nso"> + <property name="whatsThis"> + <string>When checked, any NSO yuzu tries to load or patch will be copied decompressed to the yuzu/dump directory.</string> + </property> + <property name="text"> + <string>Dump Decompressed NSOs</string> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="dump_exefs"> + <property name="whatsThis"> + <string>When checked, any game that yuzu loads will have its ExeFS dumped to the yuzu/dump directory.</string> + </property> + <property name="text"> + <string>Dump ExeFS</string> + </property> + </widget> + </item> <item> <widget class="QCheckBox" name="reporting_services"> <property name="text"> @@ -129,11 +203,11 @@ </widget> </item> <item> - <widget class="QGroupBox" name="groupBox_5"> + <widget class="QGroupBox" name="groupBox_6"> <property name="title"> <string>Advanced</string> </property> - <layout class="QVBoxLayout" name="verticalLayout"> + <layout class="QVBoxLayout" name="verticalLayout_7"> <item> <widget class="QCheckBox" name="quest_flag"> <property name="text"> @@ -145,29 +219,6 @@ </widget> </item> <item> - <widget class="QGroupBox" name="groupBox_3"> - <property name="title"> - <string>Homebrew</string> - </property> - <layout class="QVBoxLayout" name="verticalLayout_5"> - <item> - <layout class="QHBoxLayout" name="horizontalLayout_4"> - <item> - <widget class="QLabel" name="label_3"> - <property name="text"> - <string>Arguments String</string> - </property> - </widget> - </item> - <item> - <widget class="QLineEdit" name="homebrew_args_edit"/> - </item> - </layout> - </item> - </layout> - </widget> - </item> - <item> <spacer name="verticalSpacer"> <property name="orientation"> <enum>Qt::Vertical</enum> @@ -185,6 +236,19 @@ </item> </layout> </widget> + <tabstops> + <tabstop>toggle_gdbstub</tabstop> + <tabstop>gdbport_spinbox</tabstop> + <tabstop>log_filter_edit</tabstop> + <tabstop>toggle_console</tabstop> + <tabstop>open_log_button</tabstop> + <tabstop>homebrew_args_edit</tabstop> + <tabstop>enable_graphics_debugging</tabstop> + <tabstop>dump_decompressed_nso</tabstop> + <tabstop>dump_exefs</tabstop> + <tabstop>reporting_services</tabstop> + <tabstop>quest_flag</tabstop> + </tabstops> <resources/> <connections> <connection> diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index 8497eaa14..db3b19352 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -34,7 +34,7 @@ void ConfigureDialog::SetConfiguration() {} void ConfigureDialog::ApplyConfiguration() { ui->generalTab->ApplyConfiguration(); - ui->gameListTab->ApplyConfiguration(); + ui->uiTab->ApplyConfiguration(); ui->systemTab->ApplyConfiguration(); ui->profileManagerTab->ApplyConfiguration(); ui->filesystemTab->applyConfiguration(); @@ -74,7 +74,7 @@ Q_DECLARE_METATYPE(QList<QWidget*>); void ConfigureDialog::PopulateSelectionList() { const std::array<std::pair<QString, QList<QWidget*>>, 5> items{ - {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->gameListTab}}, + {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}}, {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}}, {tr("Graphics"), {ui->graphicsTab}}, {tr("Audio"), {ui->audioTab}}, @@ -108,7 +108,7 @@ void ConfigureDialog::UpdateVisibleTabs() { {ui->audioTab, tr("Audio")}, {ui->debugTab, tr("Debug")}, {ui->webTab, tr("Web")}, - {ui->gameListTab, tr("Game List")}, + {ui->uiTab, tr("UI")}, {ui->filesystemTab, tr("Filesystem")}, {ui->serviceTab, tr("Services")}, }; diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp index 34e1d7fea..5ef927114 100644 --- a/src/yuzu/configuration/configure_general.cpp +++ b/src/yuzu/configuration/configure_general.cpp @@ -15,11 +15,6 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent) ui->setupUi(this); - for (const auto& theme : UISettings::themes) { - ui->theme_combobox->addItem(QString::fromUtf8(theme.first), - QString::fromUtf8(theme.second)); - } - SetConfiguration(); connect(ui->toggle_frame_limit, &QCheckBox::toggled, ui->frame_limit, &QSpinBox::setEnabled); @@ -30,7 +25,6 @@ ConfigureGeneral::~ConfigureGeneral() = default; void ConfigureGeneral::SetConfiguration() { ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing); ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); - ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background); ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); @@ -41,8 +35,6 @@ void ConfigureGeneral::SetConfiguration() { void ConfigureGeneral::ApplyConfiguration() { UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked(); - UISettings::values.theme = - ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString(); UISettings::values.pause_when_in_background = ui->toggle_background_pause->isChecked(); Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui index 26b3486ff..857119bb3 100644 --- a/src/yuzu/configuration/configure_general.ui +++ b/src/yuzu/configuration/configure_general.ui @@ -65,39 +65,12 @@ </property> </widget> </item> - <item> - <widget class="QCheckBox" name="toggle_background_pause"> - <property name="text"> - <string>Pause emulation when in background</string> - </property> - </widget> - </item> - </layout> - </item> - </layout> - </widget> - </item> - <item> - <widget class="QGroupBox" name="theme_group_box"> - <property name="title"> - <string>Theme</string> - </property> - <layout class="QHBoxLayout" name="theme_qhbox_layout"> - <item> - <layout class="QVBoxLayout" name="theme_qvbox_layout"> <item> - <layout class="QHBoxLayout" name="theme_qhbox_layout_2"> - <item> - <widget class="QLabel" name="theme_label"> - <property name="text"> - <string>Theme:</string> - </property> - </widget> - </item> - <item> - <widget class="QComboBox" name="theme_combobox"/> - </item> - </layout> + <widget class="QCheckBox" name="toggle_background_pause"> + <property name="text"> + <string>Pause emulation when in background</string> + </property> + </widget> </item> </layout> </item> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 2c9e322c9..ea899c080 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -3,6 +3,13 @@ // Refer to the license.txt file included. #include <QColorDialog> +#include <QComboBox> +#ifdef HAS_VULKAN +#include <QVulkanInstance> +#endif + +#include "common/common_types.h" +#include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" #include "ui_configure_graphics.h" @@ -51,10 +58,18 @@ Resolution FromResolutionFactor(float factor) { ConfigureGraphics::ConfigureGraphics(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureGraphics) { + vulkan_device = Settings::values.vulkan_device; + RetrieveVulkanDevices(); + ui->setupUi(this); SetConfiguration(); + connect(ui->api, static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this, + [this] { UpdateDeviceComboBox(); }); + connect(ui->device, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), this, + [this](int device) { UpdateDeviceSelection(device); }); + connect(ui->bg_button, &QPushButton::clicked, this, [this] { const QColor new_bg_color = QColorDialog::getColor(bg_color); if (!new_bg_color.isValid()) { @@ -64,13 +79,25 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) }); } +void ConfigureGraphics::UpdateDeviceSelection(int device) { + if (device == -1) { + return; + } + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + vulkan_device = device; + } +} + ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + ui->api->setEnabled(runtime_lock); + ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); ui->resolution_factor_combobox->setCurrentIndex( static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); + ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); @@ -80,11 +107,15 @@ void ConfigureGraphics::SetConfiguration() { ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue)); + UpdateDeviceComboBox(); } void ConfigureGraphics::ApplyConfiguration() { + Settings::values.renderer_backend = GetCurrentGraphicsBackend(); + Settings::values.vulkan_device = vulkan_device; Settings::values.resolution_factor = ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); + Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); Settings::values.use_asynchronous_gpu_emulation = @@ -116,3 +147,68 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { const QIcon color_icon(pixmap); ui->bg_button->setIcon(color_icon); } + +void ConfigureGraphics::UpdateDeviceComboBox() { + ui->device->clear(); + + bool enabled = false; + switch (GetCurrentGraphicsBackend()) { + case Settings::RendererBackend::OpenGL: + ui->device->addItem(tr("OpenGL Graphics Device")); + enabled = false; + break; + case Settings::RendererBackend::Vulkan: + for (const auto device : vulkan_devices) { + ui->device->addItem(device); + } + ui->device->setCurrentIndex(vulkan_device); + enabled = !vulkan_devices.empty(); + break; + } + ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); +} + +void ConfigureGraphics::RetrieveVulkanDevices() { +#ifdef HAS_VULKAN + QVulkanInstance instance; + instance.setApiVersion(QVersionNumber(1, 1, 0)); + if (!instance.create()) { + LOG_INFO(Frontend, "Vulkan 1.1 not available"); + return; + } + const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( + instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))}; + if (vkEnumeratePhysicalDevices == nullptr) { + LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices"); + return; + } + u32 physical_device_count; + if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) != + VK_SUCCESS) { + LOG_INFO(Frontend, "Failed to get physical devices count"); + return; + } + std::vector<VkPhysicalDevice> physical_devices(physical_device_count); + if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, + physical_devices.data()) != VK_SUCCESS) { + LOG_INFO(Frontend, "Failed to get physical devices"); + return; + } + + const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>( + instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))}; + if (vkGetPhysicalDeviceProperties == nullptr) { + LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties"); + return; + } + for (const auto physical_device : physical_devices) { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(physical_device, &properties); + vulkan_devices.push_back(QString::fromUtf8(properties.deviceName)); + } +#endif +} + +Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { + return static_cast<Settings::RendererBackend>(ui->api->currentIndex()); +} diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index fae28d98e..7e0596d9c 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -5,7 +5,10 @@ #pragma once #include <memory> +#include <vector> +#include <QString> #include <QWidget> +#include "core/settings.h" namespace Ui { class ConfigureGraphics; @@ -27,7 +30,16 @@ private: void SetConfiguration(); void UpdateBackgroundColorButton(QColor color); + void UpdateDeviceComboBox(); + void UpdateDeviceSelection(int device); + + void RetrieveVulkanDevices(); + + Settings::RendererBackend GetCurrentGraphicsBackend() const; std::unique_ptr<Ui::ConfigureGraphics> ui; QColor bg_color; + + std::vector<QString> vulkan_devices; + u32 vulkan_device{}; }; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 0309ee300..db60426ab 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -7,21 +7,69 @@ <x>0</x> <y>0</y> <width>400</width> - <height>300</height> + <height>321</height> </rect> </property> <property name="windowTitle"> <string>Form</string> </property> - <layout class="QVBoxLayout" name="verticalLayout"> + <layout class="QVBoxLayout" name="verticalLayout_1"> <item> - <layout class="QVBoxLayout" name="verticalLayout_3"> + <layout class="QVBoxLayout" name="verticalLayout_2"> + <item> + <widget class="QGroupBox" name="groupBox_2"> + <property name="title"> + <string>API Settings</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_3"> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_4"> + <item> + <widget class="QLabel" name="label_2"> + <property name="text"> + <string>API:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="api"> + <item> + <property name="text"> + <string notr="true">OpenGL</string> + </property> + </item> + <item> + <property name="text"> + <string notr="true">Vulkan</string> + </property> + </item> + </widget> + </item> + </layout> + </item> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_5"> + <item> + <widget class="QLabel" name="label_3"> + <property name="text"> + <string>Device:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="device"/> + </item> + </layout> + </item> + </layout> + </widget> + </item> <item> <widget class="QGroupBox" name="groupBox"> <property name="title"> - <string>Graphics</string> + <string>Graphics Settings</string> </property> - <layout class="QVBoxLayout" name="verticalLayout_2"> + <layout class="QVBoxLayout" name="verticalLayout_4"> <item> <widget class="QCheckBox" name="use_disk_shader_cache"> <property name="text"> @@ -30,16 +78,16 @@ </widget> </item> <item> - <widget class="QCheckBox" name="use_accurate_gpu_emulation"> + <widget class="QCheckBox" name="use_asynchronous_gpu_emulation"> <property name="text"> - <string>Use accurate GPU emulation (slow)</string> + <string>Use asynchronous GPU emulation</string> </property> </widget> </item> <item> - <widget class="QCheckBox" name="use_asynchronous_gpu_emulation"> + <widget class="QCheckBox" name="use_accurate_gpu_emulation"> <property name="text"> - <string>Use asynchronous GPU emulation</string> + <string>Use accurate GPU emulation (slow)</string> </property> </widget> </item> @@ -51,11 +99,11 @@ </widget> </item> <item> - <layout class="QHBoxLayout" name="horizontalLayout"> + <layout class="QHBoxLayout" name="horizontalLayout_2"> <item> <widget class="QLabel" name="label"> <property name="text"> - <string>Internal Resolution</string> + <string>Internal Resolution:</string> </property> </widget> </item> @@ -93,6 +141,41 @@ <item> <layout class="QHBoxLayout" name="horizontalLayout_6"> <item> + <widget class="QLabel" name="ar_label"> + <property name="text"> + <string>Aspect Ratio:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="aspect_ratio_combobox"> + <item> + <property name="text"> + <string>Default (16:9)</string> + </property> + </item> + <item> + <property name="text"> + <string>Force 4:3</string> + </property> + </item> + <item> + <property name="text"> + <string>Force 21:9</string> + </property> + </item> + <item> + <property name="text"> + <string>Stretch to Window</string> + </property> + </item> + </widget> + </item> + </layout> + </item> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_3"> + <item> <widget class="QLabel" name="bg_label"> <property name="text"> <string>Background Color:</string> diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 67c9a7c6d..96dec50e2 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -236,6 +236,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i widget->setVisible(false); analog_map_stick = {ui->buttonLStickAnalog, ui->buttonRStickAnalog}; + analog_map_deadzone = {ui->sliderLStickDeadzone, ui->sliderRStickDeadzone}; + analog_map_deadzone_label = {ui->labelLStickDeadzone, ui->labelRStickDeadzone}; for (int button_id = 0; button_id < Settings::NativeButton::NumButtons; button_id++) { auto* const button = button_map[button_id]; @@ -326,6 +328,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i InputCommon::Polling::DeviceType::Analog); } }); + connect(analog_map_deadzone[analog_id], &QSlider::valueChanged, [=] { + const float deadzone = analog_map_deadzone[analog_id]->value() / 100.0f; + analog_map_deadzone_label[analog_id]->setText(tr("Deadzone: %1").arg(deadzone)); + analogs_param[analog_id].Set("deadzone", deadzone); + }); } connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); }); @@ -484,7 +491,7 @@ void ConfigureInputPlayer::ClearAll() { continue; } - analogs_param[analog_id].Erase(analog_sub_buttons[sub_button_id]); + analogs_param[analog_id].Clear(); } } @@ -508,6 +515,23 @@ void ConfigureInputPlayer::UpdateButtonLabels() { AnalogToText(analogs_param[analog_id], analog_sub_buttons[sub_button_id])); } analog_map_stick[analog_id]->setText(tr("Set Analog Stick")); + + auto& param = analogs_param[analog_id]; + auto* const analog_deadzone_slider = analog_map_deadzone[analog_id]; + auto* const analog_deadzone_label = analog_map_deadzone_label[analog_id]; + + if (param.Has("engine") && param.Get("engine", "") == "sdl") { + if (!param.Has("deadzone")) { + param.Set("deadzone", 0.1f); + } + + analog_deadzone_slider->setValue(static_cast<int>(param.Get("deadzone", 0.1f) * 100)); + analog_deadzone_slider->setVisible(true); + analog_deadzone_label->setVisible(true); + } else { + analog_deadzone_slider->setVisible(false); + analog_deadzone_label->setVisible(false); + } } } diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index c66027651..045704e47 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -97,6 +97,8 @@ private: /// Analog inputs are also represented each with a single button, used to configure with an /// actual analog stick std::array<QPushButton*, Settings::NativeAnalog::NumAnalogs> analog_map_stick; + std::array<QSlider*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone; + std::array<QLabel*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone_label; static const std::array<std::string, ANALOG_SUB_BUTTONS_NUM> analog_sub_buttons; diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui index 42db020be..1556481d0 100644 --- a/src/yuzu/configuration/configure_input_player.ui +++ b/src/yuzu/configuration/configure_input_player.ui @@ -170,6 +170,44 @@ </item> </layout> </item> + <item row="4" column="0" colspan="2"> + <layout class="QVBoxLayout" name="sliderRStickDeadzoneVerticalLayout"> + <item> + <layout class="QHBoxLayout" name="sliderRStickDeadzoneHorizontalLayout"> + <item> + <widget class="QLabel" name="labelRStickDeadzone"> + <property name="text"> + <string>Deadzone: 0</string> + </property> + <property name="alignment"> + <enum>Qt::AlignHCenter</enum> + </property> + </widget> + </item> + </layout> + </item> + <item> + <widget class="QSlider" name="sliderRStickDeadzone"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + </widget> + </item> + </layout> + </item> + <item row="5" column="0"> + <spacer name="RStick_verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + </spacer> + </item> </layout> </widget> </item> @@ -745,6 +783,47 @@ </item> </layout> </item> + <item row="5" column="1" colspan="2"> + <layout class="QVBoxLayout" name="sliderLStickDeadzoneVerticalLayout"> + <property name="sizeConstraint"> + <enum>QLayout::SetDefaultConstraint</enum> + </property> + <item> + <layout class="QHBoxLayout" name="sliderLStickDeadzoneHorizontalLayout"> + <item> + <widget class="QLabel" name="labelLStickDeadzone"> + <property name="text"> + <string>Deadzone: 0</string> + </property> + <property name="alignment"> + <enum>Qt::AlignHCenter</enum> + </property> + </widget> + </item> + </layout> + </item> + <item> + <widget class="QSlider" name="sliderLStickDeadzone"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + </widget> + </item> + </layout> + </item> + <item row="6" column="1"> + <spacer name="LStick_verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + </spacer> + </item> </layout> </widget> </item> diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_ui.cpp index e43e84d39..94424ee44 100644 --- a/src/yuzu/configuration/configure_gamelist.cpp +++ b/src/yuzu/configuration/configure_ui.cpp @@ -7,8 +7,8 @@ #include "common/common_types.h" #include "core/settings.h" -#include "ui_configure_gamelist.h" -#include "yuzu/configuration/configure_gamelist.h" +#include "ui_configure_ui.h" +#include "yuzu/configuration/configure_ui.h" #include "yuzu/uisettings.h" namespace { @@ -26,36 +26,39 @@ constexpr std::array row_text_names{ }; } // Anonymous namespace -ConfigureGameList::ConfigureGameList(QWidget* parent) - : QWidget(parent), ui(new Ui::ConfigureGameList) { +ConfigureUi::ConfigureUi(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureUi) { ui->setupUi(this); + for (const auto& theme : UISettings::themes) { + ui->theme_combobox->addItem(QString::fromUtf8(theme.first), + QString::fromUtf8(theme.second)); + } + InitializeIconSizeComboBox(); InitializeRowComboBoxes(); SetConfiguration(); // Force game list reload if any of the relevant settings are changed. - connect(ui->show_unknown, &QCheckBox::stateChanged, this, - &ConfigureGameList::RequestGameListUpdate); connect(ui->icon_size_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, - &ConfigureGameList::RequestGameListUpdate); + &ConfigureUi::RequestGameListUpdate); connect(ui->row_1_text_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, - &ConfigureGameList::RequestGameListUpdate); + &ConfigureUi::RequestGameListUpdate); connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, - &ConfigureGameList::RequestGameListUpdate); + &ConfigureUi::RequestGameListUpdate); // Update text ComboBoxes after user interaction. connect(ui->row_1_text_combobox, QOverload<int>::of(&QComboBox::activated), - [=]() { ConfigureGameList::UpdateSecondRowComboBox(); }); + [=]() { ConfigureUi::UpdateSecondRowComboBox(); }); connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::activated), - [=]() { ConfigureGameList::UpdateFirstRowComboBox(); }); + [=]() { ConfigureUi::UpdateFirstRowComboBox(); }); } -ConfigureGameList::~ConfigureGameList() = default; +ConfigureUi::~ConfigureUi() = default; -void ConfigureGameList::ApplyConfiguration() { - UISettings::values.show_unknown = ui->show_unknown->isChecked(); +void ConfigureUi::ApplyConfiguration() { + UISettings::values.theme = + ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString(); UISettings::values.show_add_ons = ui->show_add_ons->isChecked(); UISettings::values.icon_size = ui->icon_size_combobox->currentData().toUInt(); UISettings::values.row_1_text_id = ui->row_1_text_combobox->currentData().toUInt(); @@ -63,18 +66,18 @@ void ConfigureGameList::ApplyConfiguration() { Settings::Apply(); } -void ConfigureGameList::RequestGameListUpdate() { +void ConfigureUi::RequestGameListUpdate() { UISettings::values.is_game_list_reload_pending.exchange(true); } -void ConfigureGameList::SetConfiguration() { - ui->show_unknown->setChecked(UISettings::values.show_unknown); +void ConfigureUi::SetConfiguration() { + ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); ui->show_add_ons->setChecked(UISettings::values.show_add_ons); ui->icon_size_combobox->setCurrentIndex( ui->icon_size_combobox->findData(UISettings::values.icon_size)); } -void ConfigureGameList::changeEvent(QEvent* event) { +void ConfigureUi::changeEvent(QEvent* event) { if (event->type() == QEvent::LanguageChange) { RetranslateUI(); } @@ -82,7 +85,7 @@ void ConfigureGameList::changeEvent(QEvent* event) { QWidget::changeEvent(event); } -void ConfigureGameList::RetranslateUI() { +void ConfigureUi::RetranslateUI() { ui->retranslateUi(this); for (int i = 0; i < ui->icon_size_combobox->count(); i++) { @@ -97,18 +100,18 @@ void ConfigureGameList::RetranslateUI() { } } -void ConfigureGameList::InitializeIconSizeComboBox() { +void ConfigureUi::InitializeIconSizeComboBox() { for (const auto& size : default_icon_sizes) { ui->icon_size_combobox->addItem(QString::fromUtf8(size.second), size.first); } } -void ConfigureGameList::InitializeRowComboBoxes() { +void ConfigureUi::InitializeRowComboBoxes() { UpdateFirstRowComboBox(true); UpdateSecondRowComboBox(true); } -void ConfigureGameList::UpdateFirstRowComboBox(bool init) { +void ConfigureUi::UpdateFirstRowComboBox(bool init) { const int currentIndex = init ? UISettings::values.row_1_text_id : ui->row_1_text_combobox->findData(ui->row_1_text_combobox->currentData()); @@ -127,7 +130,7 @@ void ConfigureGameList::UpdateFirstRowComboBox(bool init) { ui->row_1_text_combobox->findData(ui->row_2_text_combobox->currentData())); } -void ConfigureGameList::UpdateSecondRowComboBox(bool init) { +void ConfigureUi::UpdateSecondRowComboBox(bool init) { const int currentIndex = init ? UISettings::values.row_2_text_id : ui->row_2_text_combobox->findData(ui->row_2_text_combobox->currentData()); diff --git a/src/yuzu/configuration/configure_gamelist.h b/src/yuzu/configuration/configure_ui.h index ecd3fa174..d471afe99 100644 --- a/src/yuzu/configuration/configure_gamelist.h +++ b/src/yuzu/configuration/configure_ui.h @@ -8,15 +8,15 @@ #include <QWidget> namespace Ui { -class ConfigureGameList; +class ConfigureUi; } -class ConfigureGameList : public QWidget { +class ConfigureUi : public QWidget { Q_OBJECT public: - explicit ConfigureGameList(QWidget* parent = nullptr); - ~ConfigureGameList() override; + explicit ConfigureUi(QWidget* parent = nullptr); + ~ConfigureUi() override; void ApplyConfiguration(); @@ -34,5 +34,5 @@ private: void UpdateFirstRowComboBox(bool init = false); void UpdateSecondRowComboBox(bool init = false); - std::unique_ptr<Ui::ConfigureGameList> ui; + std::unique_ptr<Ui::ConfigureUi> ui; }; diff --git a/src/yuzu/configuration/configure_gamelist.ui b/src/yuzu/configuration/configure_ui.ui index 7a69377e7..bd5c5d3c2 100644 --- a/src/yuzu/configuration/configure_gamelist.ui +++ b/src/yuzu/configuration/configure_ui.ui @@ -1,7 +1,7 @@ <?xml version="1.0" encoding="UTF-8"?> <ui version="4.0"> - <class>ConfigureGameList</class> - <widget class="QWidget" name="ConfigureGameList"> + <class>ConfigureUi</class> + <widget class="QWidget" name="ConfigureUi"> <property name="geometry"> <rect> <x>0</x> @@ -21,22 +21,22 @@ <property name="title"> <string>General</string> </property> - <layout class="QHBoxLayout" name="GeneralHorizontalLayout"> + <layout class="QHBoxLayout" name="horizontalLayout"> <item> - <layout class="QVBoxLayout" name="GeneralVerticalLayout"> + <layout class="QVBoxLayout" name="verticalLayout"> <item> - <widget class="QCheckBox" name="show_unknown"> - <property name="text"> - <string>Show files with type 'Unknown'</string> - </property> - </widget> - </item> - <item> - <widget class="QCheckBox" name="show_add_ons"> - <property name="text"> - <string>Show Add-Ons Column</string> - </property> - </widget> + <layout class="QHBoxLayout" name="horizontalLayout_3"> + <item> + <widget class="QLabel" name="theme_label"> + <property name="text"> + <string>Theme:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="theme_combobox"/> + </item> + </layout> </item> </layout> </item> @@ -44,13 +44,20 @@ </widget> </item> <item> - <widget class="QGroupBox" name="IconSizeGroupBox"> + <widget class="QGroupBox" name="GameListGroupBox"> <property name="title"> - <string>Icon Size</string> + <string>Game List</string> </property> - <layout class="QHBoxLayout" name="icon_size_qhbox_layout"> + <layout class="QHBoxLayout" name="GameListHorizontalLayout"> <item> - <layout class="QVBoxLayout" name="icon_size_qvbox_layout"> + <layout class="QVBoxLayout" name="GeneralVerticalLayout"> + <item> + <widget class="QCheckBox" name="show_add_ons"> + <property name="text"> + <string>Show Add-Ons Column</string> + </property> + </widget> + </item> <item> <layout class="QHBoxLayout" name="icon_size_qhbox_layout_2"> <item> @@ -65,19 +72,6 @@ </item> </layout> </item> - </layout> - </item> - </layout> - </widget> - </item> - <item> - <widget class="QGroupBox" name="RowGroupBox"> - <property name="title"> - <string>Row Text</string> - </property> - <layout class="QHBoxLayout" name="RowHorizontalLayout"> - <item> - <layout class="QVBoxLayout" name="RowVerticalLayout"> <item> <layout class="QHBoxLayout" name="row_1_qhbox_layout"> <item> diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 727bd8a94..3f1a94627 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -12,8 +12,8 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/thread.h" -#include "core/hle/kernel/wait_object.h" #include "core/memory.h" WaitTreeItem::WaitTreeItem() = default; @@ -133,8 +133,9 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons return list; } -WaitTreeWaitObject::WaitTreeWaitObject(const Kernel::WaitObject& o) : object(o) {} -WaitTreeWaitObject::~WaitTreeWaitObject() = default; +WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& o) + : object(o) {} +WaitTreeSynchronizationObject::~WaitTreeSynchronizationObject() = default; WaitTreeExpandableItem::WaitTreeExpandableItem() = default; WaitTreeExpandableItem::~WaitTreeExpandableItem() = default; @@ -143,25 +144,26 @@ bool WaitTreeExpandableItem::IsExpandable() const { return true; } -QString WaitTreeWaitObject::GetText() const { +QString WaitTreeSynchronizationObject::GetText() const { return tr("[%1]%2 %3") .arg(object.GetObjectId()) .arg(QString::fromStdString(object.GetTypeName()), QString::fromStdString(object.GetName())); } -std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitObject& object) { +std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::make( + const Kernel::SynchronizationObject& object) { switch (object.GetHandleType()) { case Kernel::HandleType::ReadableEvent: return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object)); case Kernel::HandleType::Thread: return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object)); default: - return std::make_unique<WaitTreeWaitObject>(object); + return std::make_unique<WaitTreeSynchronizationObject>(object); } } -std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() const { +std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChildren() const { std::vector<std::unique_ptr<WaitTreeItem>> list; const auto& threads = object.GetWaitingThreads(); @@ -173,8 +175,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() con return list; } -WaitTreeObjectList::WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::WaitObject>>& list, - bool w_all) +WaitTreeObjectList::WaitTreeObjectList( + const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, bool w_all) : object_list(list), wait_all(w_all) {} WaitTreeObjectList::~WaitTreeObjectList() = default; @@ -188,11 +190,12 @@ QString WaitTreeObjectList::GetText() const { std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeObjectList::GetChildren() const { std::vector<std::unique_ptr<WaitTreeItem>> list(object_list.size()); std::transform(object_list.begin(), object_list.end(), list.begin(), - [](const auto& t) { return WaitTreeWaitObject::make(*t); }); + [](const auto& t) { return WaitTreeSynchronizationObject::make(*t); }); return list; } -WaitTreeThread::WaitTreeThread(const Kernel::Thread& thread) : WaitTreeWaitObject(thread) {} +WaitTreeThread::WaitTreeThread(const Kernel::Thread& thread) + : WaitTreeSynchronizationObject(thread) {} WaitTreeThread::~WaitTreeThread() = default; QString WaitTreeThread::GetText() const { @@ -241,7 +244,8 @@ QString WaitTreeThread::GetText() const { const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") .arg(context.pc, 8, 16, QLatin1Char{'0'}) .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); - return QStringLiteral("%1%2 (%3) ").arg(WaitTreeWaitObject::GetText(), pc_info, status); + return QStringLiteral("%1%2 (%3) ") + .arg(WaitTreeSynchronizationObject::GetText(), pc_info, status); } QColor WaitTreeThread::GetColor() const { @@ -273,7 +277,7 @@ QColor WaitTreeThread::GetColor() const { } std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { - std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren()); + std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeSynchronizationObject::GetChildren()); const auto& thread = static_cast<const Kernel::Thread&>(object); @@ -314,7 +318,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { } if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) { - list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetWaitObjects(), + list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(), thread.IsSleepingOnWait())); } @@ -323,7 +327,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { return list; } -WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object) : WaitTreeWaitObject(object) {} +WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object) + : WaitTreeSynchronizationObject(object) {} WaitTreeEvent::~WaitTreeEvent() = default; WaitTreeThreadList::WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list) diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h index 631274a5f..8e3bc4b24 100644 --- a/src/yuzu/debugger/wait_tree.h +++ b/src/yuzu/debugger/wait_tree.h @@ -19,7 +19,7 @@ class EmuThread; namespace Kernel { class HandleTable; class ReadableEvent; -class WaitObject; +class SynchronizationObject; class Thread; } // namespace Kernel @@ -99,35 +99,37 @@ private: const Kernel::Thread& thread; }; -class WaitTreeWaitObject : public WaitTreeExpandableItem { +class WaitTreeSynchronizationObject : public WaitTreeExpandableItem { Q_OBJECT public: - explicit WaitTreeWaitObject(const Kernel::WaitObject& object); - ~WaitTreeWaitObject() override; + explicit WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& object); + ~WaitTreeSynchronizationObject() override; - static std::unique_ptr<WaitTreeWaitObject> make(const Kernel::WaitObject& object); + static std::unique_ptr<WaitTreeSynchronizationObject> make( + const Kernel::SynchronizationObject& object); QString GetText() const override; std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; protected: - const Kernel::WaitObject& object; + const Kernel::SynchronizationObject& object; }; class WaitTreeObjectList : public WaitTreeExpandableItem { Q_OBJECT public: - WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::WaitObject>>& list, bool wait_all); + WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, + bool wait_all); ~WaitTreeObjectList() override; QString GetText() const override; std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; private: - const std::vector<std::shared_ptr<Kernel::WaitObject>>& object_list; + const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& object_list; bool wait_all; }; -class WaitTreeThread : public WaitTreeWaitObject { +class WaitTreeThread : public WaitTreeSynchronizationObject { Q_OBJECT public: explicit WaitTreeThread(const Kernel::Thread& thread); @@ -138,7 +140,7 @@ public: std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; }; -class WaitTreeEvent : public WaitTreeWaitObject { +class WaitTreeEvent : public WaitTreeSynchronizationObject { Q_OBJECT public: explicit WaitTreeEvent(const Kernel::ReadableEvent& object); diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index 4c81ef12b..da2c27aa2 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp @@ -298,8 +298,7 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa } const auto file_type = loader->GetFileType(); - if ((file_type == Loader::FileType::Unknown || file_type == Loader::FileType::Error) && - !UISettings::values.show_unknown) { + if (file_type == Loader::FileType::Unknown || file_type == Loader::FileType::Error) { return true; } diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b5dd3e0d6..54ca2dc1d 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -454,7 +454,6 @@ void GMainWindow::InitializeWidgets() { // Create status bar message_label = new QLabel(); // Configured separately for left alignment - message_label->setVisible(false); message_label->setFrameStyle(QFrame::NoFrame); message_label->setContentsMargins(4, 0, 4, 0); message_label->setAlignment(Qt::AlignLeft); @@ -476,8 +475,73 @@ void GMainWindow::InitializeWidgets() { label->setVisible(false); label->setFrameStyle(QFrame::NoFrame); label->setContentsMargins(4, 0, 4, 0); - statusBar()->addPermanentWidget(label, 0); + statusBar()->addPermanentWidget(label); } + + // Setup Dock button + dock_status_button = new QPushButton(); + dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + dock_status_button->setFocusPolicy(Qt::NoFocus); + connect(dock_status_button, &QPushButton::clicked, [&] { + Settings::values.use_docked_mode = !Settings::values.use_docked_mode; + dock_status_button->setChecked(Settings::values.use_docked_mode); + OnDockedModeChanged(!Settings::values.use_docked_mode, Settings::values.use_docked_mode); + }); + dock_status_button->setText(tr("DOCK")); + dock_status_button->setCheckable(true); + dock_status_button->setChecked(Settings::values.use_docked_mode); + statusBar()->insertPermanentWidget(0, dock_status_button); + + // Setup ASync button + async_status_button = new QPushButton(); + async_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + async_status_button->setFocusPolicy(Qt::NoFocus); + connect(async_status_button, &QPushButton::clicked, [&] { + if (emulation_running) { + return; + } + Settings::values.use_asynchronous_gpu_emulation = + !Settings::values.use_asynchronous_gpu_emulation; + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); + Settings::Apply(); + }); + async_status_button->setText(tr("ASYNC")); + async_status_button->setCheckable(true); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); + statusBar()->insertPermanentWidget(0, async_status_button); + + // Setup Renderer API button + renderer_status_button = new QPushButton(); + renderer_status_button->setObjectName(QStringLiteral("RendererStatusBarButton")); + renderer_status_button->setCheckable(true); + renderer_status_button->setFocusPolicy(Qt::NoFocus); + connect(renderer_status_button, &QPushButton::toggled, [=](bool checked) { + renderer_status_button->setText(checked ? tr("VULKAN") : tr("OPENGL")); + }); + renderer_status_button->toggle(); + +#ifndef HAS_VULKAN + renderer_status_button->setChecked(false); + renderer_status_button->setCheckable(false); + renderer_status_button->setDisabled(true); +#else + renderer_status_button->setChecked(Settings::values.renderer_backend == + Settings::RendererBackend::Vulkan); + connect(renderer_status_button, &QPushButton::clicked, [=] { + if (emulation_running) { + return; + } + if (renderer_status_button->isChecked()) { + Settings::values.renderer_backend = Settings::RendererBackend::Vulkan; + } else { + Settings::values.renderer_backend = Settings::RendererBackend::OpenGL; + } + + Settings::Apply(); + }); +#endif // HAS_VULKAN + statusBar()->insertPermanentWidget(0, renderer_status_button); + statusBar()->setVisible(true); setStyleSheet(QStringLiteral("QStatusBar::item{border: none;}")); } @@ -640,6 +704,7 @@ void GMainWindow::InitializeHotkeys() { Settings::values.use_docked_mode = !Settings::values.use_docked_mode; OnDockedModeChanged(!Settings::values.use_docked_mode, Settings::values.use_docked_mode); + dock_status_button->setChecked(Settings::values.use_docked_mode); }); } @@ -806,70 +871,12 @@ void GMainWindow::AllowOSSleep() { #endif } -QStringList GMainWindow::GetUnsupportedGLExtensions() { - QStringList unsupported_ext; - - if (!GLAD_GL_ARB_buffer_storage) { - unsupported_ext.append(QStringLiteral("ARB_buffer_storage")); - } - if (!GLAD_GL_ARB_direct_state_access) { - unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); - } - if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) { - unsupported_ext.append(QStringLiteral("ARB_vertex_type_10f_11f_11f_rev")); - } - if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) { - unsupported_ext.append(QStringLiteral("ARB_texture_mirror_clamp_to_edge")); - } - if (!GLAD_GL_ARB_multi_bind) { - unsupported_ext.append(QStringLiteral("ARB_multi_bind")); - } - if (!GLAD_GL_ARB_clip_control) { - unsupported_ext.append(QStringLiteral("ARB_clip_control")); - } - - // Extensions required to support some texture formats. - if (!GLAD_GL_EXT_texture_compression_s3tc) { - unsupported_ext.append(QStringLiteral("EXT_texture_compression_s3tc")); - } - if (!GLAD_GL_ARB_texture_compression_rgtc) { - unsupported_ext.append(QStringLiteral("ARB_texture_compression_rgtc")); - } - if (!GLAD_GL_ARB_depth_buffer_float) { - unsupported_ext.append(QStringLiteral("ARB_depth_buffer_float")); - } - - for (const QString& ext : unsupported_ext) { - LOG_CRITICAL(Frontend, "Unsupported GL extension: {}", ext.toStdString()); - } - - return unsupported_ext; -} - bool GMainWindow::LoadROM(const QString& filename) { // Shutdown previous session if the emu thread is still active... if (emu_thread != nullptr) ShutdownGame(); - render_window->InitRenderTarget(); - - { - Core::Frontend::ScopeAcquireWindowContext acquire_context{*render_window}; - if (!gladLoadGL()) { - QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"), - tr("Your GPU may not support OpenGL 4.3, or you do not " - "have the latest graphics driver.")); - return false; - } - } - - const QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions(); - if (!unsupported_gl_extensions.empty()) { - QMessageBox::critical(this, tr("Error while initializing OpenGL Core!"), - tr("Your GPU may not support one or more required OpenGL" - "extensions. Please ensure you have the latest graphics " - "driver.<br><br>Unsupported extensions:<br>") + - unsupported_gl_extensions.join(QStringLiteral("<br>"))); + if (!render_window->InitRenderTarget()) { return false; } @@ -980,7 +987,9 @@ void GMainWindow::BootGame(const QString& filename) { // Create and start the emulation thread emu_thread = std::make_unique<EmuThread>(render_window); emit EmulationStarting(emu_thread.get()); - render_window->moveContext(); + if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { + render_window->moveContext(); + } emu_thread->start(); connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); @@ -1000,6 +1009,8 @@ void GMainWindow::BootGame(const QString& filename) { game_list_placeholder->hide(); } status_bar_update_timer.start(2000); + async_status_button->setDisabled(true); + renderer_status_button->setDisabled(true); const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); @@ -1065,10 +1076,13 @@ void GMainWindow::ShutdownGame() { // Disable status bar updates status_bar_update_timer.stop(); - message_label->setVisible(false); emu_speed_label->setVisible(false); game_fps_label->setVisible(false); emu_frametime_label->setVisible(false); + async_status_button->setEnabled(true); +#ifdef HAS_VULKAN + renderer_status_button->setEnabled(true); +#endif emulation_running = false; @@ -1836,6 +1850,13 @@ void GMainWindow::OnConfigure() { } config->Save(); + + dock_status_button->setChecked(Settings::values.use_docked_mode); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); +#ifdef HAS_VULKAN + renderer_status_button->setChecked(Settings::values.renderer_backend == + Settings::RendererBackend::Vulkan); +#endif } void GMainWindow::OnLoadAmiibo() { @@ -2028,7 +2049,6 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det if (emu_thread) { emu_thread->SetRunning(true); message_label->setText(status_message); - message_label->setVisible(true); } } } @@ -2195,6 +2215,18 @@ void GMainWindow::closeEvent(QCloseEvent* event) { QWidget::closeEvent(event); } +void GMainWindow::keyPressEvent(QKeyEvent* event) { + if (render_window) { + render_window->ForwardKeyPressEvent(event); + } +} + +void GMainWindow::keyReleaseEvent(QKeyEvent* event) { + if (render_window) { + render_window->ForwardKeyReleaseEvent(event); + } +} + static bool IsSingleFileDropEvent(QDropEvent* event) { const QMimeData* mimeData = event->mimeData(); return mimeData->hasUrls() && mimeData->urls().length() == 1; @@ -2227,18 +2259,6 @@ void GMainWindow::dragMoveEvent(QDragMoveEvent* event) { event->acceptProposedAction(); } -void GMainWindow::keyPressEvent(QKeyEvent* event) { - if (render_window) { - render_window->ForwardKeyPressEvent(event); - } -} - -void GMainWindow::keyReleaseEvent(QKeyEvent* event) { - if (render_window) { - render_window->ForwardKeyReleaseEvent(event); - } -} - bool GMainWindow::ConfirmChangeGame() { if (emu_thread == nullptr) return true; @@ -2290,8 +2310,16 @@ void GMainWindow::UpdateUITheme() { QStringList theme_paths(default_theme_paths); if (is_default_theme || current_theme.isEmpty()) { - qApp->setStyleSheet({}); - setStyleSheet({}); + const QString theme_uri(QStringLiteral(":default/style.qss")); + QFile f(theme_uri); + if (f.open(QFile::ReadOnly | QFile::Text)) { + QTextStream ts(&f); + qApp->setStyleSheet(ts.readAll()); + setStyleSheet(ts.readAll()); + } else { + qApp->setStyleSheet({}); + setStyleSheet({}); + } theme_paths.append(default_icons); QIcon::setThemeName(default_icons); } else { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index a56f9a981..8eba2172c 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -27,6 +27,7 @@ class LoadingScreen; class MicroProfileDialog; class ProfilerWidget; class QLabel; +class QPushButton; class WaitTreeWidget; enum class GameListOpenTarget; class GameListPlaceholder; @@ -130,7 +131,6 @@ private: void PreventOSSleep(); void AllowOSSleep(); - QStringList GetUnsupportedGLExtensions(); bool LoadROM(const QString& filename); void BootGame(const QString& filename); void ShutdownGame(); @@ -229,6 +229,9 @@ private: QLabel* emu_speed_label = nullptr; QLabel* game_fps_label = nullptr; QLabel* emu_frametime_label = nullptr; + QPushButton* async_status_button = nullptr; + QPushButton* renderer_status_button = nullptr; + QPushButton* dock_status_button = nullptr; QTimer status_bar_update_timer; std::unique_ptr<Config> config; diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index bc7725a01..a675ecf4d 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -89,7 +89,6 @@ struct Values { int profile_index; // Game List - bool show_unknown; bool show_add_ons; uint32_t icon_size; uint8_t row_1_text_id; diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt index b5f06ab9e..a15719a0f 100644 --- a/src/yuzu_cmd/CMakeLists.txt +++ b/src/yuzu_cmd/CMakeLists.txt @@ -8,11 +8,22 @@ add_executable(yuzu-cmd emu_window/emu_window_sdl2_gl.h emu_window/emu_window_sdl2.cpp emu_window/emu_window_sdl2.h + emu_window/emu_window_sdl2_gl.cpp + emu_window/emu_window_sdl2_gl.h resource.h yuzu.cpp yuzu.rc ) +if (ENABLE_VULKAN) + target_sources(yuzu-cmd PRIVATE + emu_window/emu_window_sdl2_vk.cpp + emu_window/emu_window_sdl2_vk.h) + + target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include) + target_compile_definitions(yuzu-cmd PRIVATE HAS_VULKAN) +endif() + create_target_directory_groups(yuzu-cmd) target_link_libraries(yuzu-cmd PRIVATE common core input_common) diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 1a812cb87..96f1ce3af 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -12,6 +12,7 @@ #include "core/hle/service/acc/profile_manager.h" #include "core/settings.h" #include "input_common/main.h" +#include "input_common/udp/client.h" #include "yuzu_cmd/config.h" #include "yuzu_cmd/default_ini.h" @@ -297,6 +298,10 @@ void Config::ReadValues() { sdl2_config->GetInteger("ControlsGeneral", "touch_diameter_x", 15); Settings::values.touchscreen.diameter_y = sdl2_config->GetInteger("ControlsGeneral", "touch_diameter_y", 15); + Settings::values.udp_input_address = + sdl2_config->Get("Controls", "udp_input_address", InputCommon::CemuhookUDP::DEFAULT_ADDR); + Settings::values.udp_input_port = static_cast<u16>(sdl2_config->GetInteger( + "Controls", "udp_input_port", InputCommon::CemuhookUDP::DEFAULT_PORT)); std::transform(keyboard_keys.begin(), keyboard_keys.end(), Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam); @@ -366,8 +371,16 @@ void Config::ReadValues() { Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); // Renderer + const int renderer_backend = sdl2_config->GetInteger( + "Renderer", "backend", static_cast<int>(Settings::RendererBackend::OpenGL)); + Settings::values.renderer_backend = static_cast<Settings::RendererBackend>(renderer_backend); + Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); + Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); + Settings::values.resolution_factor = static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); + Settings::values.aspect_ratio = + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); Settings::values.frame_limit = static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 8d18a4a5a..8a2b658cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -69,18 +69,46 @@ rstick= # - "motion_emu" (default) for emulating motion input from mouse input. Required parameters: # - "update_period": update period in milliseconds (default to 100) # - "sensitivity": the coefficient converting mouse movement to tilting angle (default to 0.01) +# - "cemuhookudp" reads motion input from a udp server that uses cemuhook's udp protocol motion_device= # for touch input, the following devices are available: # - "emu_window" (default) for emulating touch input from mouse input to the emulation window. No parameters required +# - "cemuhookudp" reads touch input from a udp server that uses cemuhook's udp protocol +# - "min_x", "min_y", "max_x", "max_y": defines the udp device's touch screen coordinate system touch_device= +# Most desktop operating systems do not expose a way to poll the motion state of the controllers +# so as a way around it, cemuhook created a udp client/server protocol to broadcast the data directly +# from a controller device to the client program. Citra has a client that can connect and read +# from any cemuhook compatible motion program. + +# IPv4 address of the udp input server (Default "127.0.0.1") +udp_input_address= + +# Port of the udp input server. (Default 26760) +udp_input_port= + +# The pad to request data on. Should be between 0 (Pad 1) and 3 (Pad 4). (Default 0) +udp_pad_index= + [Core] # Whether to use multi-core for CPU emulation # 0 (default): Disabled, 1: Enabled use_multi_core= [Renderer] +# Which backend API to use. +# 0 (default): OpenGL, 1: Vulkan +backend = + +# Enable graphics API debugging mode. +# 0 (default): Disabled, 1: Enabled +debug = + +# Which Vulkan physical device to use (defaults to 0) +vulkan_device = + # Whether to use software or hardware rendering. # 0: Software, 1 (default): Hardware use_hw_renderer = @@ -94,6 +122,10 @@ use_shader_jit = # factor for the Switch resolution resolution_factor = +# Aspect ratio +# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window +aspect_ratio = + # Whether to enable V-Sync (caps the framerate at 60FPS) or not. # 0 (default): Off, 1: On use_vsync = diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index b1c512db1..e96139885 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -89,6 +89,10 @@ bool EmuWindow_SDL2::IsOpen() const { return is_open; } +bool EmuWindow_SDL2::IsShown() const { + return is_shown; +} + void EmuWindow_SDL2::OnResize() { int width, height; SDL_GetWindowSize(render_window, &width, &height); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h index eaa971f77..b38f56661 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h @@ -21,6 +21,9 @@ public: /// Whether the window is still open, and a close request hasn't yet been sent bool IsOpen() const; + /// Returns if window is shown (not minimized) + bool IsShown() const override; + protected: /// Called by PollEvents when a key is pressed or released. void OnKeyEvent(int key, u8 state); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 6fde694a2..7ffa0ac09 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -9,6 +9,7 @@ #include <SDL.h> #include <fmt/format.h> #include <glad/glad.h> +#include "common/assert.h" #include "common/logging/log.h" #include "common/scm_rev.h" #include "common/string_util.h" @@ -151,6 +152,12 @@ void EmuWindow_SDL2_GL::DoneCurrent() { SDL_GL_MakeCurrent(render_window, nullptr); } +void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const { + // Should not have been called from OpenGL + UNREACHABLE(); +} + std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { return std::make_unique<SDLGLContext>(); } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h index 630deba93..c753085a8 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h @@ -22,6 +22,10 @@ public: /// Releases the GL context from the caller thread void DoneCurrent() override; + /// Ignored in OpenGL + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; + std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; private: diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp new file mode 100644 index 000000000..a203f0da9 --- /dev/null +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp @@ -0,0 +1,162 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> +#include <vector> +#include <SDL.h> +#include <SDL_vulkan.h> +#include <fmt/format.h> +#include <vulkan/vulkan.h> +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/scm_rev.h" +#include "core/settings.h" +#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" + +EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(bool fullscreen) : EmuWindow_SDL2(fullscreen) { + if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { + LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); + exit(EXIT_FAILURE); + } + + vkGetInstanceProcAddr = + reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr()); + if (vkGetInstanceProcAddr == nullptr) { + LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); + exit(EXIT_FAILURE); + } + + const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, + Common::g_scm_branch, Common::g_scm_desc); + render_window = + SDL_CreateWindow(window_title.c_str(), + SDL_WINDOWPOS_UNDEFINED, // x position + SDL_WINDOWPOS_UNDEFINED, // y position + Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, + SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN); + + const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr); + + u32 extra_ext_count{}; + if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) { + LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}", + SDL_GetError()); + exit(1); + } + + auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count); + if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) { + LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError()); + exit(1); + } + std::vector<const char*> enabled_extensions; + enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(), + extra_ext_names.get() + extra_ext_count); + + std::vector<const char*> enabled_layers; + if (use_standard_layers) { + enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation"); + } + + VkApplicationInfo app_info{}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.apiVersion = VK_API_VERSION_1_1; + app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); + app_info.pApplicationName = "yuzu-emu"; + app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); + app_info.pEngineName = "yuzu-emu"; + + VkInstanceCreateInfo instance_ci{}; + instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_ci.pApplicationInfo = &app_info; + instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()); + instance_ci.ppEnabledExtensionNames = enabled_extensions.data(); + if (Settings::values.renderer_debug) { + instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size()); + instance_ci.ppEnabledLayerNames = enabled_layers.data(); + } + + const auto vkCreateInstance = + reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance")); + if (vkCreateInstance == nullptr || + vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) { + LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!"); + exit(EXIT_FAILURE); + } + + vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( + vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance")); + if (vkDestroyInstance == nullptr) { + LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); + exit(EXIT_FAILURE); + } + + if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) { + LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError()); + exit(EXIT_FAILURE); + } + + OnResize(); + OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); + SDL_PumpEvents(); + LOG_INFO(Frontend, "yuzu Version: {} | {}-{} (Vulkan)", Common::g_build_name, + Common::g_scm_branch, Common::g_scm_desc); +} + +EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { + vkDestroyInstance(vk_instance, nullptr); +} + +void EmuWindow_SDL2_VK::SwapBuffers() {} + +void EmuWindow_SDL2_VK::MakeCurrent() { + // Unused on Vulkan +} + +void EmuWindow_SDL2_VK::DoneCurrent() { + // Unused on Vulkan +} + +void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const { + const auto instance_proc_addr = vkGetInstanceProcAddr; + std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); + std::memcpy(instance, &vk_instance, sizeof(vk_instance)); + std::memcpy(surface, &vk_surface, sizeof(vk_surface)); +} + +std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { + return nullptr; +} + +bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const { + if (!Settings::values.renderer_debug) { + return false; + } + + const auto vkEnumerateInstanceLayerProperties = + reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>( + vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties")); + if (vkEnumerateInstanceLayerProperties == nullptr) { + LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); + return false; + } + + u32 available_layers_count{}; + if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) { + LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); + return false; + } + std::vector<VkLayerProperties> layers(available_layers_count); + if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) { + LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); + return false; + } + + return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) { + return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); + }) != layers.end(); +} diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h new file mode 100644 index 000000000..2a7c06a24 --- /dev/null +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h @@ -0,0 +1,39 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vulkan/vulkan.h> +#include "core/frontend/emu_window.h" +#include "yuzu_cmd/emu_window/emu_window_sdl2.h" + +class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { +public: + explicit EmuWindow_SDL2_VK(bool fullscreen); + ~EmuWindow_SDL2_VK(); + + /// Swap buffers to display the next frame + void SwapBuffers() override; + + /// Makes the graphics context current for the caller thread + void MakeCurrent() override; + + /// Releases the GL context from the caller thread + void DoneCurrent() override; + + /// Retrieves Vulkan specific handlers from the window + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; + + std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; + +private: + bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const; + + VkInstance vk_instance{}; + VkSurfaceKHR vk_surface{}; + + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; + PFN_vkDestroyInstance vkDestroyInstance{}; +}; diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 3ee088a91..325795321 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -32,6 +32,9 @@ #include "yuzu_cmd/config.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" #include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" +#ifdef HAS_VULKAN +#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" +#endif #include "core/file_sys/registered_cache.h" @@ -174,7 +177,20 @@ int main(int argc, char** argv) { Settings::values.use_gdbstub = use_gdbstub; Settings::Apply(); - std::unique_ptr<EmuWindow_SDL2> emu_window{std::make_unique<EmuWindow_SDL2_GL>(fullscreen)}; + std::unique_ptr<EmuWindow_SDL2> emu_window; + switch (Settings::values.renderer_backend) { + case Settings::RendererBackend::OpenGL: + emu_window = std::make_unique<EmuWindow_SDL2_GL>(fullscreen); + break; + case Settings::RendererBackend::Vulkan: +#ifdef HAS_VULKAN + emu_window = std::make_unique<EmuWindow_SDL2_VK>(fullscreen); + break; +#else + LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!"); + return 1; +#endif + } if (!Settings::values.use_multi_core) { // Single core mode must acquire OpenGL context for entire emulation session diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 84ab4d687..0ac93b62a 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp @@ -118,6 +118,8 @@ void Config::ReadValues() { // Renderer Settings::values.resolution_factor = static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); + Settings::values.aspect_ratio = + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); Settings::values.use_frame_limit = false; Settings::values.frame_limit = 100; Settings::values.use_disk_shader_cache = diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 9a3e86d68..8d93f7b88 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h @@ -26,6 +26,10 @@ use_shader_jit = # factor for the Switch resolution resolution_factor = +# Aspect ratio +# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window +aspect_ratio = + # Whether to enable V-Sync (caps the framerate at 60FPS) or not. # 0 (default): Off, 1: On use_vsync = diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp index e7fe8decf..f2cc4a797 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp @@ -5,10 +5,15 @@ #include <algorithm> #include <cstdlib> #include <string> + +#include <fmt/format.h> + #define SDL_MAIN_HANDLED #include <SDL.h> -#include <fmt/format.h> + #include <glad/glad.h> + +#include "common/assert.h" #include "common/logging/log.h" #include "common/scm_rev.h" #include "core/settings.h" @@ -120,3 +125,11 @@ void EmuWindow_SDL2_Hide::MakeCurrent() { void EmuWindow_SDL2_Hide::DoneCurrent() { SDL_GL_MakeCurrent(render_window, nullptr); } + +bool EmuWindow_SDL2_Hide::IsShown() const { + return false; +} + +void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const { + UNREACHABLE(); +} diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h index 1a8953c75..c7fccc002 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h @@ -25,6 +25,13 @@ public: /// Releases the GL context from the caller thread void DoneCurrent() override; + /// Whether the screen is being shown or not. + bool IsShown() const override; + + /// Retrieves Vulkan specific handlers from the window + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; + /// Whether the window is still open, and a close request hasn't yet been sent bool IsOpen() const; |