diff options
Diffstat (limited to 'src')
65 files changed, 1478 insertions, 684 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1a3647a67..d342cafe0 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -15,14 +15,14 @@ add_library(core STATIC constants.h core.cpp core.h - core_cpu.cpp - core_cpu.h + core_manager.cpp + core_manager.h core_timing.cpp core_timing.h core_timing_util.cpp core_timing_util.h - cpu_core_manager.cpp - cpu_core_manager.h + cpu_manager.cpp + cpu_manager.h crypto/aes_util.cpp crypto/aes_util.h crypto/encryption_layer.cpp @@ -158,6 +158,8 @@ add_library(core STATIC hle/kernel/mutex.h hle/kernel/object.cpp hle/kernel/object.h + hle/kernel/physical_core.cpp + hle/kernel/physical_core.h hle/kernel/process.cpp hle/kernel/process.h hle/kernel/process_capability.cpp diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index e825c0526..791640a3a 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -10,11 +10,12 @@ #include "common/microprofile.h" #include "core/arm/dynarmic/arm_dynarmic.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" @@ -87,7 +88,7 @@ public: if (GDBStub::IsServerEnabled()) { parent.jit->HaltExecution(); parent.SetPC(pc); - Kernel::Thread* thread = Kernel::GetCurrentThread(); + Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); parent.SaveContext(thread->GetContext()); GDBStub::Break(); GDBStub::SendTrap(thread, 5); diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index abd59ff4b..94570e520 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp @@ -2,10 +2,24 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif #include "core/arm/exclusive_monitor.h" +#include "core/memory.h" namespace Core { ExclusiveMonitor::~ExclusiveMonitor() = default; +std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, + std::size_t num_cores) { +#ifdef ARCHITECTURE_x86_64 + return std::make_unique<Core::DynarmicExclusiveMonitor>(memory, num_cores); +#else + // TODO(merry): Passthrough exclusive monitor + return nullptr; +#endif +} + } // namespace Core diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index f59aca667..4ef418b90 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -4,8 +4,14 @@ #pragma once +#include <memory> + #include "common/common_types.h" +namespace Memory { +class Memory; +} + namespace Core { class ExclusiveMonitor { @@ -22,4 +28,7 @@ public: virtual bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) = 0; }; +std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, + std::size_t num_cores); + } // namespace Core diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 48182c99a..f99ad5802 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -9,6 +9,7 @@ #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" namespace Core { @@ -177,7 +178,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); } - Kernel::Thread* thread = Kernel::GetCurrentThread(); + Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); SaveContext(thread->GetContext()); if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { last_bkpt_hit = false; diff --git a/src/core/core.cpp b/src/core/core.cpp index 10ed5219b..0eb0c0dca 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -11,9 +11,9 @@ #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" -#include "core/cpu_core_manager.h" +#include "core/cpu_manager.h" #include "core/file_sys/bis_factory.h" #include "core/file_sys/card_image.h" #include "core/file_sys/mode.h" @@ -28,6 +28,7 @@ #include "core/hardware_interrupt_manager.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" @@ -113,16 +114,25 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, struct System::Impl { explicit Impl(System& system) : kernel{system}, fs_controller{system}, memory{system}, - cpu_core_manager{system}, reporter{system}, applet_manager{system} {} + cpu_manager{system}, reporter{system}, applet_manager{system} {} - Cpu& CurrentCpuCore() { - return cpu_core_manager.GetCurrentCore(); + CoreManager& CurrentCoreManager() { + return cpu_manager.GetCurrentCoreManager(); + } + + Kernel::PhysicalCore& CurrentPhysicalCore() { + const auto index = cpu_manager.GetActiveCoreIndex(); + return kernel.PhysicalCore(index); + } + + Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) { + return kernel.PhysicalCore(index); } ResultStatus RunLoop(bool tight_loop) { status = ResultStatus::Success; - cpu_core_manager.RunLoop(tight_loop); + cpu_manager.RunLoop(tight_loop); return status; } @@ -131,8 +141,8 @@ struct System::Impl { LOG_DEBUG(HW_Memory, "initialized OK"); core_timing.Initialize(); - cpu_core_manager.Initialize(); kernel.Initialize(); + cpu_manager.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch()); @@ -205,7 +215,6 @@ struct System::Impl { // Main process has been loaded and been made current. // Begin GPU and CPU execution. gpu_core->Start(); - cpu_core_manager.StartThreads(); // Initialize cheat engine if (cheat_engine) { @@ -274,7 +283,7 @@ struct System::Impl { gpu_core.reset(); // Close all CPU/threading state - cpu_core_manager.Shutdown(); + cpu_manager.Shutdown(); // Shutdown kernel and core timing kernel.Shutdown(); @@ -344,7 +353,7 @@ struct System::Impl { std::unique_ptr<Tegra::GPU> gpu_core; std::unique_ptr<Hardware::InterruptManager> interrupt_manager; Memory::Memory memory; - CpuCoreManager cpu_core_manager; + CpuManager cpu_manager; bool is_powered_on = false; bool exit_lock = false; @@ -379,12 +388,12 @@ struct System::Impl { System::System() : impl{std::make_unique<Impl>(*this)} {} System::~System() = default; -Cpu& System::CurrentCpuCore() { - return impl->CurrentCpuCore(); +CoreManager& System::CurrentCoreManager() { + return impl->CurrentCoreManager(); } -const Cpu& System::CurrentCpuCore() const { - return impl->CurrentCpuCore(); +const CoreManager& System::CurrentCoreManager() const { + return impl->CurrentCoreManager(); } System::ResultStatus System::RunLoop(bool tight_loop) { @@ -396,7 +405,7 @@ System::ResultStatus System::SingleStep() { } void System::InvalidateCpuInstructionCaches() { - impl->cpu_core_manager.InvalidateAllInstructionCaches(); + impl->kernel.InvalidateAllInstructionCaches(); } System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath) { @@ -408,13 +417,11 @@ bool System::IsPoweredOn() const { } void System::PrepareReschedule() { - CurrentCpuCore().PrepareReschedule(); + impl->CurrentPhysicalCore().Stop(); } void System::PrepareReschedule(const u32 core_index) { - if (core_index < GlobalScheduler().CpuCoresCount()) { - CpuCore(core_index).PrepareReschedule(); - } + impl->kernel.PrepareReschedule(core_index); } PerfStatsResults System::GetAndResetPerfStats() { @@ -430,31 +437,31 @@ const TelemetrySession& System::TelemetrySession() const { } ARM_Interface& System::CurrentArmInterface() { - return CurrentCpuCore().ArmInterface(); + return impl->CurrentPhysicalCore().ArmInterface(); } const ARM_Interface& System::CurrentArmInterface() const { - return CurrentCpuCore().ArmInterface(); + return impl->CurrentPhysicalCore().ArmInterface(); } std::size_t System::CurrentCoreIndex() const { - return CurrentCpuCore().CoreIndex(); + return impl->cpu_manager.GetActiveCoreIndex(); } Kernel::Scheduler& System::CurrentScheduler() { - return CurrentCpuCore().Scheduler(); + return impl->CurrentPhysicalCore().Scheduler(); } const Kernel::Scheduler& System::CurrentScheduler() const { - return CurrentCpuCore().Scheduler(); + return impl->CurrentPhysicalCore().Scheduler(); } Kernel::Scheduler& System::Scheduler(std::size_t core_index) { - return CpuCore(core_index).Scheduler(); + return impl->GetPhysicalCore(core_index).Scheduler(); } const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { - return CpuCore(core_index).Scheduler(); + return impl->GetPhysicalCore(core_index).Scheduler(); } /// Gets the global scheduler @@ -476,28 +483,28 @@ const Kernel::Process* System::CurrentProcess() const { } ARM_Interface& System::ArmInterface(std::size_t core_index) { - return CpuCore(core_index).ArmInterface(); + return impl->GetPhysicalCore(core_index).ArmInterface(); } const ARM_Interface& System::ArmInterface(std::size_t core_index) const { - return CpuCore(core_index).ArmInterface(); + return impl->GetPhysicalCore(core_index).ArmInterface(); } -Cpu& System::CpuCore(std::size_t core_index) { - return impl->cpu_core_manager.GetCore(core_index); +CoreManager& System::GetCoreManager(std::size_t core_index) { + return impl->cpu_manager.GetCoreManager(core_index); } -const Cpu& System::CpuCore(std::size_t core_index) const { +const CoreManager& System::GetCoreManager(std::size_t core_index) const { ASSERT(core_index < NUM_CPU_CORES); - return impl->cpu_core_manager.GetCore(core_index); + return impl->cpu_manager.GetCoreManager(core_index); } ExclusiveMonitor& System::Monitor() { - return impl->cpu_core_manager.GetExclusiveMonitor(); + return impl->kernel.GetExclusiveMonitor(); } const ExclusiveMonitor& System::Monitor() const { - return impl->cpu_core_manager.GetExclusiveMonitor(); + return impl->kernel.GetExclusiveMonitor(); } Memory::Memory& System::Memory() { diff --git a/src/core/core.h b/src/core/core.h index e240c5c58..e69d68fcf 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -93,7 +93,7 @@ class Memory; namespace Core { class ARM_Interface; -class Cpu; +class CoreManager; class ExclusiveMonitor; class FrameLimiter; class PerfStats; @@ -218,10 +218,10 @@ public: const ARM_Interface& ArmInterface(std::size_t core_index) const; /// Gets a CPU interface to the CPU core with the specified index - Cpu& CpuCore(std::size_t core_index); + CoreManager& GetCoreManager(std::size_t core_index); /// Gets a CPU interface to the CPU core with the specified index - const Cpu& CpuCore(std::size_t core_index) const; + const CoreManager& GetCoreManager(std::size_t core_index) const; /// Gets a reference to the exclusive monitor ExclusiveMonitor& Monitor(); @@ -364,10 +364,10 @@ private: System(); /// Returns the currently running CPU core - Cpu& CurrentCpuCore(); + CoreManager& CurrentCoreManager(); /// Returns the currently running CPU core - const Cpu& CurrentCpuCore() const; + const CoreManager& CurrentCoreManager() const; /** * Initialize the emulated system. diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp deleted file mode 100644 index 630cd4feb..000000000 --- a/src/core/core_cpu.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <condition_variable> -#include <mutex> - -#include "common/logging/log.h" -#ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" -#endif -#include "core/arm/exclusive_monitor.h" -#include "core/arm/unicorn/arm_unicorn.h" -#include "core/core.h" -#include "core/core_cpu.h" -#include "core/core_timing.h" -#include "core/hle/kernel/scheduler.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/lock.h" -#include "core/settings.h" - -namespace Core { - -void CpuBarrier::NotifyEnd() { - std::unique_lock lock{mutex}; - end = true; - condition.notify_all(); -} - -bool CpuBarrier::Rendezvous() { - if (!Settings::values.use_multi_core) { - // Meaningless when running in single-core mode - return true; - } - - if (!end) { - std::unique_lock lock{mutex}; - - --cores_waiting; - if (!cores_waiting) { - cores_waiting = NUM_CPU_CORES; - condition.notify_all(); - return true; - } - - condition.wait(lock); - return true; - } - - return false; -} - -Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, - std::size_t core_index) - : cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()}, - core_timing{system.CoreTiming()}, core_index{core_index} { -#ifdef ARCHITECTURE_x86_64 - arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); -#else - arm_interface = std::make_unique<ARM_Unicorn>(system); - LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); -#endif - - scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); -} - -Cpu::~Cpu() = default; - -std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor( - [[maybe_unused]] Memory::Memory& memory, [[maybe_unused]] std::size_t num_cores) { -#ifdef ARCHITECTURE_x86_64 - return std::make_unique<DynarmicExclusiveMonitor>(memory, num_cores); -#else - // TODO(merry): Passthrough exclusive monitor - return nullptr; -#endif -} - -void Cpu::RunLoop(bool tight_loop) { - // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step - if (!cpu_barrier.Rendezvous()) { - // If rendezvous failed, session has been killed - return; - } - - Reschedule(); - - // If we don't have a currently active thread then don't execute instructions, - // instead advance to the next event and try to yield to the next thread - if (Kernel::GetCurrentThread() == nullptr) { - LOG_TRACE(Core, "Core-{} idling", core_index); - core_timing.Idle(); - } else { - if (tight_loop) { - arm_interface->Run(); - } else { - arm_interface->Step(); - } - // We are stopping a run, exclusive state must be cleared - arm_interface->ClearExclusiveState(); - } - core_timing.Advance(); - - Reschedule(); -} - -void Cpu::SingleStep() { - return RunLoop(false); -} - -void Cpu::PrepareReschedule() { - arm_interface->PrepareReschedule(); -} - -void Cpu::Reschedule() { - // Lock the global kernel mutex when we manipulate the HLE state - std::lock_guard lock(HLE::g_hle_lock); - - global_scheduler.SelectThread(core_index); - scheduler->TryDoContextSwitch(); -} - -void Cpu::Shutdown() { - scheduler->Shutdown(); -} - -} // namespace Core diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h deleted file mode 100644 index 78f5021a2..000000000 --- a/src/core/core_cpu.h +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <condition_variable> -#include <cstddef> -#include <memory> -#include <mutex> -#include "common/common_types.h" - -namespace Kernel { -class GlobalScheduler; -class Scheduler; -} // namespace Kernel - -namespace Core { -class System; -} - -namespace Core::Timing { -class CoreTiming; -} - -namespace Memory { -class Memory; -} - -namespace Core { - -class ARM_Interface; -class ExclusiveMonitor; - -constexpr unsigned NUM_CPU_CORES{4}; - -class CpuBarrier { -public: - bool IsAlive() const { - return !end; - } - - void NotifyEnd(); - - bool Rendezvous(); - -private: - unsigned cores_waiting{NUM_CPU_CORES}; - std::mutex mutex; - std::condition_variable condition; - std::atomic<bool> end{}; -}; - -class Cpu { -public: - Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, - std::size_t core_index); - ~Cpu(); - - void RunLoop(bool tight_loop = true); - - void SingleStep(); - - void PrepareReschedule(); - - ARM_Interface& ArmInterface() { - return *arm_interface; - } - - const ARM_Interface& ArmInterface() const { - return *arm_interface; - } - - Kernel::Scheduler& Scheduler() { - return *scheduler; - } - - const Kernel::Scheduler& Scheduler() const { - return *scheduler; - } - - bool IsMainCore() const { - return core_index == 0; - } - - std::size_t CoreIndex() const { - return core_index; - } - - void Shutdown(); - - /** - * Creates an exclusive monitor to handle exclusive reads/writes. - * - * @param memory The current memory subsystem that the monitor may wish - * to keep track of. - * - * @param num_cores The number of cores to assume about the CPU. - * - * @returns The constructed exclusive monitor instance, or nullptr if the current - * CPU backend is unable to use an exclusive monitor. - */ - static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, - std::size_t num_cores); - -private: - void Reschedule(); - - std::unique_ptr<ARM_Interface> arm_interface; - CpuBarrier& cpu_barrier; - Kernel::GlobalScheduler& global_scheduler; - std::unique_ptr<Kernel::Scheduler> scheduler; - Timing::CoreTiming& core_timing; - - std::atomic<bool> reschedule_pending = false; - std::size_t core_index; -}; - -} // namespace Core diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp new file mode 100644 index 000000000..8eacf92dd --- /dev/null +++ b/src/core/core_manager.cpp @@ -0,0 +1,70 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <condition_variable> +#include <mutex> + +#include "common/logging/log.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/lock.h" +#include "core/settings.h" + +namespace Core { + +CoreManager::CoreManager(System& system, std::size_t core_index) + : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore( + core_index)}, + core_timing{system.CoreTiming()}, core_index{core_index} {} + +CoreManager::~CoreManager() = default; + +void CoreManager::RunLoop(bool tight_loop) { + Reschedule(); + + // If we don't have a currently active thread then don't execute instructions, + // instead advance to the next event and try to yield to the next thread + if (Kernel::GetCurrentThread() == nullptr) { + LOG_TRACE(Core, "Core-{} idling", core_index); + core_timing.Idle(); + } else { + if (tight_loop) { + physical_core.Run(); + } else { + physical_core.Step(); + } + } + core_timing.Advance(); + + Reschedule(); +} + +void CoreManager::SingleStep() { + return RunLoop(false); +} + +void CoreManager::PrepareReschedule() { + physical_core.Stop(); +} + +void CoreManager::Reschedule() { + // Lock the global kernel mutex when we manipulate the HLE state + std::lock_guard lock(HLE::g_hle_lock); + + global_scheduler.SelectThread(core_index); + + physical_core.Scheduler().TryDoContextSwitch(); +} + +} // namespace Core diff --git a/src/core/core_manager.h b/src/core/core_manager.h new file mode 100644 index 000000000..b14e723d7 --- /dev/null +++ b/src/core/core_manager.h @@ -0,0 +1,63 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <cstddef> +#include <memory> +#include "common/common_types.h" + +namespace Kernel { +class GlobalScheduler; +class PhysicalCore; +} // namespace Kernel + +namespace Core { +class System; +} + +namespace Core::Timing { +class CoreTiming; +} + +namespace Memory { +class Memory; +} + +namespace Core { + +constexpr unsigned NUM_CPU_CORES{4}; + +class CoreManager { +public: + CoreManager(System& system, std::size_t core_index); + ~CoreManager(); + + void RunLoop(bool tight_loop = true); + + void SingleStep(); + + void PrepareReschedule(); + + bool IsMainCore() const { + return core_index == 0; + } + + std::size_t CoreIndex() const { + return core_index; + } + +private: + void Reschedule(); + + Kernel::GlobalScheduler& global_scheduler; + Kernel::PhysicalCore& physical_core; + Timing::CoreTiming& core_timing; + + std::atomic<bool> reschedule_pending = false; + std::size_t core_index; +}; + +} // namespace Core diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp deleted file mode 100644 index f04a34133..000000000 --- a/src/core/cpu_core_manager.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "core/arm/exclusive_monitor.h" -#include "core/core.h" -#include "core/core_cpu.h" -#include "core/core_timing.h" -#include "core/cpu_core_manager.h" -#include "core/gdbstub/gdbstub.h" -#include "core/settings.h" - -namespace Core { -namespace { -void RunCpuCore(const System& system, Cpu& cpu_state) { - while (system.IsPoweredOn()) { - cpu_state.RunLoop(true); - } -} -} // Anonymous namespace - -CpuCoreManager::CpuCoreManager(System& system) : system{system} {} -CpuCoreManager::~CpuCoreManager() = default; - -void CpuCoreManager::Initialize() { - barrier = std::make_unique<CpuBarrier>(); - exclusive_monitor = Cpu::MakeExclusiveMonitor(system.Memory(), cores.size()); - - for (std::size_t index = 0; index < cores.size(); ++index) { - cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); - } -} - -void CpuCoreManager::StartThreads() { - // Create threads for CPU cores 1-3, and build thread_to_cpu map - // CPU core 0 is run on the main thread - thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); - if (!Settings::values.use_multi_core) { - return; - } - - for (std::size_t index = 0; index < core_threads.size(); ++index) { - core_threads[index] = std::make_unique<std::thread>(RunCpuCore, std::cref(system), - std::ref(*cores[index + 1])); - thread_to_cpu[core_threads[index]->get_id()] = cores[index + 1].get(); - } -} - -void CpuCoreManager::Shutdown() { - barrier->NotifyEnd(); - if (Settings::values.use_multi_core) { - for (auto& thread : core_threads) { - thread->join(); - thread.reset(); - } - } - - thread_to_cpu.clear(); - for (auto& cpu_core : cores) { - cpu_core->Shutdown(); - cpu_core.reset(); - } - - exclusive_monitor.reset(); - barrier.reset(); -} - -Cpu& CpuCoreManager::GetCore(std::size_t index) { - return *cores.at(index); -} - -const Cpu& CpuCoreManager::GetCore(std::size_t index) const { - return *cores.at(index); -} - -ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() { - return *exclusive_monitor; -} - -const ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() const { - return *exclusive_monitor; -} - -Cpu& CpuCoreManager::GetCurrentCore() { - if (Settings::values.use_multi_core) { - const auto& search = thread_to_cpu.find(std::this_thread::get_id()); - ASSERT(search != thread_to_cpu.end()); - ASSERT(search->second); - return *search->second; - } - - // Otherwise, use single-threaded mode active_core variable - return *cores[active_core]; -} - -const Cpu& CpuCoreManager::GetCurrentCore() const { - if (Settings::values.use_multi_core) { - const auto& search = thread_to_cpu.find(std::this_thread::get_id()); - ASSERT(search != thread_to_cpu.end()); - ASSERT(search->second); - return *search->second; - } - - // Otherwise, use single-threaded mode active_core variable - return *cores[active_core]; -} - -void CpuCoreManager::RunLoop(bool tight_loop) { - // Update thread_to_cpu in case Core 0 is run from a different host thread - thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); - - if (GDBStub::IsServerEnabled()) { - GDBStub::HandlePacket(); - - // If the loop is halted and we want to step, use a tiny (1) number of instructions to - // execute. Otherwise, get out of the loop function. - if (GDBStub::GetCpuHaltFlag()) { - if (GDBStub::GetCpuStepFlag()) { - tight_loop = false; - } else { - return; - } - } - } - - auto& core_timing = system.CoreTiming(); - core_timing.ResetRun(); - bool keep_running{}; - do { - keep_running = false; - for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { - core_timing.SwitchContext(active_core); - if (core_timing.CanCurrentContextRun()) { - cores[active_core]->RunLoop(tight_loop); - } - keep_running |= core_timing.CanCurrentContextRun(); - } - } while (keep_running); - - if (GDBStub::IsServerEnabled()) { - GDBStub::SetCpuStepFlag(false); - } -} - -void CpuCoreManager::InvalidateAllInstructionCaches() { - for (auto& cpu : cores) { - cpu->ArmInterface().ClearInstructionCache(); - } -} - -} // namespace Core diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h deleted file mode 100644 index 2cbbf8216..000000000 --- a/src/core/cpu_core_manager.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <map> -#include <memory> -#include <thread> - -namespace Core { - -class Cpu; -class CpuBarrier; -class ExclusiveMonitor; -class System; - -class CpuCoreManager { -public: - explicit CpuCoreManager(System& system); - CpuCoreManager(const CpuCoreManager&) = delete; - CpuCoreManager(CpuCoreManager&&) = delete; - - ~CpuCoreManager(); - - CpuCoreManager& operator=(const CpuCoreManager&) = delete; - CpuCoreManager& operator=(CpuCoreManager&&) = delete; - - void Initialize(); - void StartThreads(); - void Shutdown(); - - Cpu& GetCore(std::size_t index); - const Cpu& GetCore(std::size_t index) const; - - Cpu& GetCurrentCore(); - const Cpu& GetCurrentCore() const; - - ExclusiveMonitor& GetExclusiveMonitor(); - const ExclusiveMonitor& GetExclusiveMonitor() const; - - void RunLoop(bool tight_loop); - - void InvalidateAllInstructionCaches(); - -private: - static constexpr std::size_t NUM_CPU_CORES = 4; - - std::unique_ptr<ExclusiveMonitor> exclusive_monitor; - std::unique_ptr<CpuBarrier> barrier; - std::array<std::unique_ptr<Cpu>, NUM_CPU_CORES> cores; - std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> core_threads; - std::size_t active_core{}; ///< Active core, only used in single thread mode - - /// Map of guest threads to CPU cores - std::map<std::thread::id, Cpu*> thread_to_cpu; - - System& system; -}; - -} // namespace Core diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp new file mode 100644 index 000000000..70ddbdcca --- /dev/null +++ b/src/core/cpu_manager.cpp @@ -0,0 +1,81 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/arm/exclusive_monitor.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/cpu_manager.h" +#include "core/gdbstub/gdbstub.h" + +namespace Core { + +CpuManager::CpuManager(System& system) : system{system} {} +CpuManager::~CpuManager() = default; + +void CpuManager::Initialize() { + for (std::size_t index = 0; index < core_managers.size(); ++index) { + core_managers[index] = std::make_unique<CoreManager>(system, index); + } +} + +void CpuManager::Shutdown() { + for (auto& cpu_core : core_managers) { + cpu_core.reset(); + } +} + +CoreManager& CpuManager::GetCoreManager(std::size_t index) { + return *core_managers.at(index); +} + +const CoreManager& CpuManager::GetCoreManager(std::size_t index) const { + return *core_managers.at(index); +} + +CoreManager& CpuManager::GetCurrentCoreManager() { + // Otherwise, use single-threaded mode active_core variable + return *core_managers[active_core]; +} + +const CoreManager& CpuManager::GetCurrentCoreManager() const { + // Otherwise, use single-threaded mode active_core variable + return *core_managers[active_core]; +} + +void CpuManager::RunLoop(bool tight_loop) { + if (GDBStub::IsServerEnabled()) { + GDBStub::HandlePacket(); + + // If the loop is halted and we want to step, use a tiny (1) number of instructions to + // execute. Otherwise, get out of the loop function. + if (GDBStub::GetCpuHaltFlag()) { + if (GDBStub::GetCpuStepFlag()) { + tight_loop = false; + } else { + return; + } + } + } + + auto& core_timing = system.CoreTiming(); + core_timing.ResetRun(); + bool keep_running{}; + do { + keep_running = false; + for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { + core_timing.SwitchContext(active_core); + if (core_timing.CanCurrentContextRun()) { + core_managers[active_core]->RunLoop(tight_loop); + } + keep_running |= core_timing.CanCurrentContextRun(); + } + } while (keep_running); + + if (GDBStub::IsServerEnabled()) { + GDBStub::SetCpuStepFlag(false); + } +} + +} // namespace Core diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h new file mode 100644 index 000000000..feb619e1b --- /dev/null +++ b/src/core/cpu_manager.h @@ -0,0 +1,50 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> + +namespace Core { + +class CoreManager; +class System; + +class CpuManager { +public: + explicit CpuManager(System& system); + CpuManager(const CpuManager&) = delete; + CpuManager(CpuManager&&) = delete; + + ~CpuManager(); + + CpuManager& operator=(const CpuManager&) = delete; + CpuManager& operator=(CpuManager&&) = delete; + + void Initialize(); + void Shutdown(); + + CoreManager& GetCoreManager(std::size_t index); + const CoreManager& GetCoreManager(std::size_t index) const; + + CoreManager& GetCurrentCoreManager(); + const CoreManager& GetCurrentCoreManager() const; + + std::size_t GetActiveCoreIndex() const { + return active_core; + } + + void RunLoop(bool tight_loop); + +private: + static constexpr std::size_t NUM_CPU_CORES = 4; + + std::array<std::unique_ptr<CoreManager>, NUM_CPU_CORES> core_managers; + std::size_t active_core{}; ///< Active core, only used in single thread mode + + System& system; +}; + +} // namespace Core diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h index 7c11d7546..2b098b7c6 100644 --- a/src/core/frontend/input.h +++ b/src/core/frontend/input.h @@ -15,6 +15,13 @@ namespace Input { +enum class AnalogDirection : u8 { + RIGHT, + LEFT, + UP, + DOWN, +}; + /// An abstract class template for an input device (a button, an analog input, etc.). template <typename StatusType> class InputDevice { @@ -23,6 +30,9 @@ public: virtual StatusType GetStatus() const { return {}; } + virtual bool GetAnalogDirectionStatus(AnalogDirection direction) const { + return {}; + } }; /// An abstract class template for a factory that can create input devices. diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 37cb28848..67e95999d 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -35,7 +35,7 @@ #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index db189c8e3..2ea3dcb61 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -8,7 +8,6 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/scheduler.h" diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 1d0783bd3..edd4c4259 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -3,13 +3,15 @@ // Refer to the license.txt file included. #include <atomic> +#include <functional> #include <memory> #include <mutex> #include <utility> #include "common/assert.h" #include "common/logging/log.h" - +#include "core/arm/arm_interface.h" +#include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" @@ -17,6 +19,7 @@ #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/scheduler.h" @@ -98,6 +101,7 @@ struct KernelCore::Impl { void Initialize(KernelCore& kernel) { Shutdown(); + InitializePhysicalCores(); InitializeSystemResourceLimit(kernel); InitializeThreads(); InitializePreemption(); @@ -121,6 +125,21 @@ struct KernelCore::Impl { global_scheduler.Shutdown(); named_ports.clear(); + + for (auto& core : cores) { + core.Shutdown(); + } + cores.clear(); + + exclusive_monitor.reset(); + } + + void InitializePhysicalCores() { + exclusive_monitor = + Core::MakeExclusiveMonitor(system.Memory(), global_scheduler.CpuCoresCount()); + for (std::size_t i = 0; i < global_scheduler.CpuCoresCount(); i++) { + cores.emplace_back(system, i, *exclusive_monitor); + } } // Creates the default system resource limit @@ -186,6 +205,9 @@ struct KernelCore::Impl { /// the ConnectToPort SVC. NamedPortTable named_ports; + std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; + std::vector<Kernel::PhysicalCore> cores; + // System context Core::System& system; }; @@ -240,6 +262,34 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { return impl->global_scheduler; } +Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) { + return impl->cores[id]; +} + +const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const { + return impl->cores[id]; +} + +Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() { + return *impl->exclusive_monitor; +} + +const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { + return *impl->exclusive_monitor; +} + +void KernelCore::InvalidateAllInstructionCaches() { + for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) { + PhysicalCore(i).ArmInterface().ClearInstructionCache(); + } +} + +void KernelCore::PrepareReschedule(std::size_t id) { + if (id < impl->global_scheduler.CpuCoresCount()) { + impl->cores[id].Stop(); + } +} + void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) { impl->named_ports.emplace(std::move(name), std::move(port)); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 3bf0068ed..fccffaf3a 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,8 +11,9 @@ #include "core/hle/kernel/object.h" namespace Core { +class ExclusiveMonitor; class System; -} +} // namespace Core namespace Core::Timing { class CoreTiming; @@ -25,6 +26,7 @@ class AddressArbiter; class ClientPort; class GlobalScheduler; class HandleTable; +class PhysicalCore; class Process; class ResourceLimit; class Thread; @@ -84,6 +86,21 @@ public: /// Gets the sole instance of the global scheduler const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Gets the an instance of the respective physical CPU core. + Kernel::PhysicalCore& PhysicalCore(std::size_t id); + + /// Gets the an instance of the respective physical CPU core. + const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; + + /// Stops execution of 'id' core, in order to reschedule a new thread. + void PrepareReschedule(std::size_t id); + + Core::ExclusiveMonitor& GetExclusiveMonitor(); + + const Core::ExclusiveMonitor& GetExclusiveMonitor() const; + + void InvalidateAllInstructionCaches(); + /// Adds a port to the named port table void AddNamedPort(std::string name, std::shared_ptr<ClientPort> port); diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp new file mode 100644 index 000000000..9303dd273 --- /dev/null +++ b/src/core/hle/kernel/physical_core.cpp @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/arm/arm_interface.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h" +#include "core/core.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" + +namespace Kernel { + +PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, + Core::ExclusiveMonitor& exclusive_monitor) + : core_index{id} { +#ifdef ARCHITECTURE_x86_64 + arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); +#else + arm_interface = std::make_shared<Core::ARM_Unicorn>(system); + LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); +#endif + + scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); +} + +PhysicalCore::~PhysicalCore() = default; + +void PhysicalCore::Run() { + arm_interface->Run(); + arm_interface->ClearExclusiveState(); +} + +void PhysicalCore::Step() { + arm_interface->Step(); +} + +void PhysicalCore::Stop() { + arm_interface->PrepareReschedule(); +} + +void PhysicalCore::Shutdown() { + scheduler->Shutdown(); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h new file mode 100644 index 000000000..4c32c0f1b --- /dev/null +++ b/src/core/hle/kernel/physical_core.h @@ -0,0 +1,77 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <memory> + +namespace Kernel { +class Scheduler; +} // namespace Kernel + +namespace Core { +class ARM_Interface; +class ExclusiveMonitor; +class System; +} // namespace Core + +namespace Kernel { + +class PhysicalCore { +public: + PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor); + ~PhysicalCore(); + + PhysicalCore(const PhysicalCore&) = delete; + PhysicalCore& operator=(const PhysicalCore&) = delete; + + PhysicalCore(PhysicalCore&&) = default; + PhysicalCore& operator=(PhysicalCore&&) = default; + + /// Execute current jit state + void Run(); + /// Execute a single instruction in current jit. + void Step(); + /// Stop JIT execution/exit + void Stop(); + + // Shutdown this physical core. + void Shutdown(); + + Core::ARM_Interface& ArmInterface() { + return *arm_interface; + } + + const Core::ARM_Interface& ArmInterface() const { + return *arm_interface; + } + + bool IsMainCore() const { + return core_index == 0; + } + + bool IsSystemCore() const { + return core_index == 3; + } + + std::size_t CoreIndex() const { + return core_index; + } + + Kernel::Scheduler& Scheduler() { + return *scheduler; + } + + const Kernel::Scheduler& Scheduler() const { + return *scheduler; + } + +private: + std::size_t core_index; + std::unique_ptr<Core::ARM_Interface> arm_interface; + std::unique_ptr<Kernel::Scheduler> scheduler; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index d36fcd7d9..eb196a690 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -14,7 +14,6 @@ #include "common/logging/log.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/core_timing.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index dbcdb0b88..1d99bf7a2 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -15,7 +15,7 @@ #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/kernel/address_arbiter.h" diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index e84e5ce0d..e965b5b04 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -13,7 +13,6 @@ #include "common/thread_queue_list.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/kernel/errors.h" @@ -356,7 +355,7 @@ void Thread::SetActivity(ThreadActivity value) { // Set status if not waiting if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { SetStatus(ThreadStatus::Paused); - Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule(); + kernel.PrepareReschedule(processor_id); } } else if (status == ThreadStatus::Paused) { // Ready to reschedule diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 745f2c4e8..a0c806e8f 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -7,7 +7,6 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" @@ -96,7 +95,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { } if (resume) { thread->ResumeFromWait(); - Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID()); + kernel.PrepareReschedule(thread->GetProcessorID()); } } diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 4d952adc0..15c09f04c 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -250,6 +250,10 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { auto& rstick_entry = npad_pad_states[controller_idx].r_stick; const auto& button_state = buttons[controller_idx]; const auto& analog_state = sticks[controller_idx]; + const auto [stick_l_x_f, stick_l_y_f] = + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus(); + const auto [stick_r_x_f, stick_r_y_f] = + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus(); using namespace Settings::NativeButton; pad_state.a.Assign(button_state[A - BUTTON_HID_BEGIN]->GetStatus()); @@ -270,23 +274,32 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { pad_state.d_right.Assign(button_state[DRight - BUTTON_HID_BEGIN]->GetStatus()); pad_state.d_down.Assign(button_state[DDown - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_left.Assign(button_state[LStick_Left - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_up.Assign(button_state[LStick_Up - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_right.Assign(button_state[LStick_Right - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_down.Assign(button_state[LStick_Down - BUTTON_HID_BEGIN]->GetStatus()); - - pad_state.r_stick_left.Assign(button_state[RStick_Left - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_up.Assign(button_state[RStick_Up - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_right.Assign(button_state[RStick_Right - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_down.Assign(button_state[RStick_Down - BUTTON_HID_BEGIN]->GetStatus()); + pad_state.l_stick_right.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::RIGHT)); + pad_state.l_stick_left.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::LEFT)); + pad_state.l_stick_up.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::UP)); + pad_state.l_stick_down.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::DOWN)); + + pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT)); + pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT)); + pad_state.r_stick_right.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::UP)); + pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN)); pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus()); pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus()); - const auto [stick_l_x_f, stick_l_y_f] = - analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus(); - const auto [stick_r_x_f, stick_r_y_f] = - analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus(); lstick_entry.x = static_cast<s32>(stick_l_x_f * HID_JOYSTICK_MAX); lstick_entry.y = static_cast<s32>(stick_l_y_f * HID_JOYSTICK_MAX); rstick_entry.x = static_cast<s32>(stick_r_x_f * HID_JOYSTICK_MAX); diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 9e028da89..c98c848cf 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -41,6 +41,7 @@ void Shutdown() { Input::UnregisterFactory<Input::MotionDevice>("motion_emu"); motion_emu.reset(); sdl.reset(); + udp.reset(); } Keyboard* GetKeyboard() { diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index d2e9d278f..a2e0c0bd2 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp @@ -342,6 +342,22 @@ public: return std::make_tuple<float, float>(0.0f, 0.0f); } + bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { + const auto [x, y] = GetStatus(); + const float directional_deadzone = 0.4f; + switch (direction) { + case Input::AnalogDirection::RIGHT: + return x > directional_deadzone; + case Input::AnalogDirection::LEFT: + return x < -directional_deadzone; + case Input::AnalogDirection::UP: + return y > directional_deadzone; + case Input::AnalogDirection::DOWN: + return y < -directional_deadzone; + } + return false; + } + private: std::shared_ptr<SDLJoystick> joystick; const int axis_x; diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 5f5a9989c..2228571a6 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp @@ -14,7 +14,6 @@ #include "input_common/udp/client.h" #include "input_common/udp/protocol.h" -using boost::asio::ip::address_v4; using boost::asio::ip::udp; namespace InputCommon::CemuhookUDP { @@ -31,10 +30,10 @@ public: explicit Socket(const std::string& host, u16 port, u8 pad_index, u32 client_id, SocketCallback callback) - : client_id(client_id), timer(io_service), - send_endpoint(udp::endpoint(address_v4::from_string(host), port)), - socket(io_service, udp::endpoint(udp::v4(), 0)), pad_index(pad_index), - callback(std::move(callback)) {} + : callback(std::move(callback)), timer(io_service), + socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), + pad_index(pad_index), + send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} void Stop() { io_service.stop(); @@ -126,7 +125,7 @@ static void SocketLoop(Socket* socket) { Client::Client(std::shared_ptr<DeviceStatus> status, const std::string& host, u16 port, u8 pad_index, u32 client_id) - : status(status) { + : status(std::move(status)) { StartCommunication(host, port, pad_index, client_id); } @@ -207,7 +206,7 @@ void TestCommunication(const std::string& host, u16 port, u8 pad_index, u32 clie Common::Event success_event; SocketCallback callback{[](Response::Version version) {}, [](Response::PortInfo info) {}, [&](Response::PadData data) { success_event.Set(); }}; - Socket socket{host, port, pad_index, client_id, callback}; + Socket socket{host, port, pad_index, client_id, std::move(callback)}; std::thread worker_thread{SocketLoop, &socket}; bool result = success_event.WaitFor(std::chrono::seconds(8)); socket.Stop(); @@ -267,7 +266,7 @@ CalibrationConfigurationJob::CalibrationConfigurationJob( complete_event.Set(); } }}; - Socket socket{host, port, pad_index, client_id, callback}; + Socket socket{host, port, pad_index, client_id, std::move(callback)}; std::thread worker_thread{SocketLoop, &socket}; complete_event.Wait(); socket.Stop(); diff --git a/src/input_common/udp/client.h b/src/input_common/udp/client.h index 0b21f4da6..b8c654755 100644 --- a/src/input_common/udp/client.h +++ b/src/input_common/udp/client.h @@ -11,7 +11,6 @@ #include <string> #include <thread> #include <tuple> -#include <vector> #include "common/common_types.h" #include "common/thread.h" #include "common/vector_math.h" diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h index 1b521860a..3ba4d1fc8 100644 --- a/src/input_common/udp/protocol.h +++ b/src/input_common/udp/protocol.h @@ -7,7 +7,6 @@ #include <array> #include <optional> #include <type_traits> -#include <vector> #include <boost/crc.hpp> #include "common/bit_field.h" #include "common/swap.h" diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index a80f38614..ca99cc22f 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp @@ -2,7 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/logging/log.h" +#include <mutex> +#include <tuple> + #include "common/param_package.h" #include "core/frontend/input.h" #include "core/settings.h" @@ -14,7 +16,7 @@ namespace InputCommon::CemuhookUDP { class UDPTouchDevice final : public Input::TouchDevice { public: explicit UDPTouchDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} - std::tuple<float, float, bool> GetStatus() const { + std::tuple<float, float, bool> GetStatus() const override { std::lock_guard guard(status->update_mutex); return status->touch_status; } @@ -26,7 +28,7 @@ private: class UDPMotionDevice final : public Input::MotionDevice { public: explicit UDPMotionDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} - std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const { + std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override { std::lock_guard guard(status->update_mutex); return status->motion_status; } diff --git a/src/input_common/udp/udp.h b/src/input_common/udp/udp.h index ea3de60bb..4f83f0441 100644 --- a/src/input_common/udp/udp.h +++ b/src/input_common/udp/udp.h @@ -2,15 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <memory> -#include <unordered_map> -#include "input_common/main.h" -#include "input_common/udp/client.h" namespace InputCommon::CemuhookUDP { -class UDPTouchDevice; -class UDPMotionDevice; +class Client; class State { public: diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8218b7cd2..db9332d00 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,6 +29,8 @@ add_library(video_core STATIC gpu_synch.h gpu_thread.cpp gpu_thread.h + guest_driver.cpp + guest_driver.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index 44b8b8d22..d56a47710 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { @@ -106,6 +107,9 @@ public: virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const = 0; virtual u32 GetBoundBuffer() const = 0; + + virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; + virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 110406f2f..4b824aa4e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con return result; } +VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + +const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 4ef3e0613..eeb79c56f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -218,6 +218,10 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 58dfa8033..7cea146f0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b return result; } +VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + +const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ee79260fc..8808bbf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1306,6 +1306,10 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array<u32, 0x40000>; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6f98bd827..f443ec0fe 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -227,6 +227,28 @@ enum class AtomicOp : u64 { Exch = 8, }; +enum class GlobalAtomicOp : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, + SafeAdd = 10, +}; + +enum class GlobalAtomicType : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32_FTZ_RN = 3, + F16x2_FTZ_RN = 4, + S64 = 5, +}; + enum class UniformType : u64 { UnsignedByte = 0, SignedByte = 1, @@ -958,6 +980,12 @@ union Instruction { } stg; union { + BitField<52, 4, GlobalAtomicOp> operation; + BitField<49, 3, GlobalAtomicType> type; + BitField<28, 20, s64> offset; + } atom; + + union { BitField<52, 4, AtomicOp> operation; BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; @@ -1690,6 +1718,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1994,6 +2023,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp new file mode 100644 index 000000000..6adef459e --- /dev/null +++ b/src/video_core/guest_driver.cpp @@ -0,0 +1,36 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <limits> + +#include "video_core/guest_driver.h" + +namespace VideoCore { + +void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { + if (texture_handler_size_deduced) { + return; + } + const std::size_t size = bound_offsets.size(); + if (size < 2) { + return; + } + std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); + u32 min_val = std::numeric_limits<u32>::max(); + for (std::size_t i = 1; i < size; ++i) { + if (bound_offsets[i] == bound_offsets[i - 1]) { + continue; + } + const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; + min_val = std::min(min_val, new_min); + } + if (min_val > 2) { + return; + } + texture_handler_size_deduced = true; + texture_handler_size = min_texture_handler_size * min_val; +} + +} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h new file mode 100644 index 000000000..fc1917347 --- /dev/null +++ b/src/video_core/guest_driver.h @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" + +namespace VideoCore { + +/** + * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect + * information necessary for impossible to avoid HLE methods like shader tracks as they are + * Entscheidungsproblems. + */ +class GuestDriverProfile { +public: + void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); + + u32 GetTextureHandlerSize() const { + return texture_handler_size; + } + + bool TextureHandlerSizeKnown() const { + return texture_handler_size_deduced; + } + +private: + // Minimum size of texture handler any driver can use. + static constexpr u32 min_texture_handler_size = 4; + // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily + // use 4 bytes instead. Thus, certain drivers may squish the size. + static constexpr u32 default_texture_handler_size = 8; + + u32 texture_handler_size = default_texture_handler_size; + bool texture_handler_size_deduced = false; +}; + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5b0eca9e2..c586cd6fe 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" +#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -78,5 +79,18 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, const DiskResourceLoadCallback& callback = {}) {} + + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. + GuestDriverProfile& AccessGuestDriverProfile() { + return guest_driver_profile; + } + + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. + const GuestDriverProfile& AccessGuestDriverProfile() const { + return guest_driver_profile; + } + +private: + GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c428f06e4..362942e09 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -55,16 +55,20 @@ namespace { template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type) { + Tegra::Engines::ShaderType shader_type, + std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 offset = + entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(shader_type, entry.GetOffset()); + return engine.GetStageTexture(shader_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetShaderEntries().samplers) { const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); + SetupTexture(binding++, texture, entry); + } + } } } @@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().samplers) { - const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); + SetupTexture(binding++, texture, entry); + } + } } } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3c5bdd377..489eb143c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s } void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + locker.SetBoundBuffer(usage.bound_buffer); for (const auto& key : usage.keys) { const auto [buffer, offset] = key.first; locker.InsertKey(buffer, offset, key.second); @@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() { ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, const ConstBufferLocker& locker) const { - return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), + return ShaderDiskCacheUsage{unique_identifier, variant, + locker.GetBoundBuffer(), locker.GetKeys(), locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6531dfe9b..4735000b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -391,6 +391,7 @@ public: DeclareVertex(); DeclareGeometry(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareInternalFlags(); @@ -503,6 +504,16 @@ private: } } + void DeclareCustomVariables() { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { + code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); + } + if (num_custom_variables > 0) { + code.AddNewLine(); + } + } + void DeclarePredicates() { const auto& predicates = ir.GetPredicates(); for (const auto pred : predicates) { @@ -655,7 +666,8 @@ private: u32 binding = device.GetBaseBindings(stage).sampler; for (const auto& sampler : ir.GetSamplers()) { const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding++); + const std::string description = fmt::format("layout (binding = {}) uniform", binding); + binding += sampler.IsIndexed() ? sampler.Size() : 1; std::string sampler_type = [&]() { if (sampler.IsBuffer()) { @@ -682,7 +694,11 @@ private: sampler_type += "Shadow"; } - code.AddLine("{} {} {};", description, sampler_type, name); + if (!sampler.IsIndexed()) { + code.AddLine("{} {} {};", description, sampler_type, name); + } else { + code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size()); + } } if (!ir.GetSamplers().empty()) { code.AddNewLine(); @@ -775,6 +791,11 @@ private: return {GetRegister(index), Type::Float}; } + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + const u32 index = cv->GetIndex(); + return {GetCustomVariable(index), Type::Float}; + } + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { @@ -1098,7 +1119,11 @@ private: } else if (!meta->ptp.empty()) { expr += "Offsets"; } - expr += '(' + GetSampler(meta->sampler) + ", "; + if (!meta->sampler.IsIndexed()) { + expr += '(' + GetSampler(meta->sampler) + ", "; + } else { + expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; + } expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow && !separate_dc ? 1 : 0) - 1); expr += '('; @@ -1310,6 +1335,8 @@ private: const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), Type::Uint}; + } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { + target = {GetCustomVariable(cv->GetIndex()), Type::Float}; } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1857,10 +1884,7 @@ private: template <const std::string_view& opname, Type type> Expression Atomic(Operation operation) { - ASSERT(stage == ShaderType::Compute); - auto& smem = std::get<SmemNode>(*operation[0]); - - return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), + return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).As(type)), type}; } @@ -2240,6 +2264,10 @@ private: return GetDeclarationWithSuffix(index, "gpr"); } + std::string GetCustomVariable(u32 index) const { + return GetDeclarationWithSuffix(index, "custom_var"); + } + std::string GetPredicate(Tegra::Shader::Pred pred) const { return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index cf874a09a..1fc204f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -53,7 +53,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 11; +constexpr u32 NativeVersion = 12; // Making sure sizes doesn't change by accident static_assert(sizeof(ProgramVariant) == 20); @@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 num_bound_samplers{}; u32 num_bindless_samplers{}; if (file.ReadArray(&usage.unique_identifier, 1) != 1 || - file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || + file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { LOG_ERROR(Render_OpenGL, error_loading); @@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { u32 num_bindless_samplers{}; ShaderDiskCacheUsage usage; if (!LoadObjectFromPrecompiled(usage.unique_identifier) || - !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(usage.variant) || + !LoadObjectFromPrecompiled(usage.bound_buffer) || + !LoadObjectFromPrecompiled(num_keys) || !LoadObjectFromPrecompiled(num_bound_samplers) || !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; @@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(usage.bound_buffer) != 1 || file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { @@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p }; if (!SaveObjectToPrecompiled(usage.unique_identifier) || - !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 69a2fbdda..ef2371f6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>); struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + u32 bound_buffer{}; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b53078721..24a658dce 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -353,6 +353,7 @@ private: DeclareFragment(); DeclareCompute(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareSharedMemory(); @@ -586,6 +587,15 @@ private: } } + void DeclareCustomVariables() { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { + const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); + Name(id, fmt::format("custom_var_{}", i)); + custom_variables.emplace(i, AddGlobalVariable(id)); + } + } + void DeclarePredicates() { for (const auto pred : ir.GetPredicates()) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); @@ -982,6 +992,11 @@ private: return {OpLoad(t_float, registers.at(index)), Type::Float}; } + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + const u32 index = cv->GetIndex(); + return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; + } + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { return {Constant(t_uint, immediate->GetValue()), Type::Uint}; } @@ -1123,15 +1138,7 @@ private: } if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - - Id offset = OpISub(t_uint, real, base); - offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); - return {OpLoad(t_float, - OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), - Type::Float}; + return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; } if (const auto lmem = std::get_if<LmemNode>(&*node)) { @@ -1142,10 +1149,7 @@ private: } if (const auto smem = std::get_if<SmemNode>(&*node)) { - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); - return {OpLoad(t_uint, pointer), Type::Uint}; + return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; } if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { @@ -1339,20 +1343,13 @@ private: target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; + target = {GetSharedMemoryPointer(*smem), Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), - Type::Float}; + } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { + target = {custom_variables.at(cv->GetIndex()), Type::Float}; } else { UNIMPLEMENTED(); @@ -1804,11 +1801,16 @@ private: return {}; } - Expression UAtomicAdd(Operation operation) { - const auto& smem = std::get<SmemNode>(*operation[0]); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); + Expression AtomicAdd(Operation operation) { + Id pointer; + if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { + pointer = GetSharedMemoryPointer(*smem); + } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { + pointer = GetGlobalMemoryPointer(*gmem); + } else { + UNREACHABLE(); + return {Constant(t_uint, 0), Type::Uint}; + } const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id semantics = Constant(t_uint, 0U); @@ -2243,6 +2245,22 @@ private: return {}; } + Id GetGlobalMemoryPointer(const GmemNode& gmem) { + const Id real = AsUint(Visit(gmem.GetRealAddress())); + const Id base = AsUint(Visit(gmem.GetBaseAddress())); + const Id diff = OpISub(t_uint, real, base); + const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + const Id buffer = global_buffers.at(gmem.GetDescriptor()); + return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); + } + + Id GetSharedMemoryPointer(const SmemNode& smem) { + ASSERT(stage == ShaderType::Compute); + Id address = AsUint(Visit(smem.GetAddress())); + address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); + return OpAccessChain(t_smem_uint, shared_memory, address); + } + static constexpr std::array operation_decompilers = { &SPIRVDecompiler::Assign, @@ -2389,7 +2407,7 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::UAtomicAdd, + &SPIRVDecompiler::AtomicAdd, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, @@ -2485,9 +2503,9 @@ private: Id t_smem_uint{}; - const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); + const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); + Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); const Id t_gmem_struct = MemberDecorate( Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); @@ -2508,6 +2526,7 @@ private: Id out_vertex{}; Id in_vertex{}; std::map<u32, Id> registers; + std::map<u32, Id> custom_variables; std::map<Tegra::Shader::Pred, Id> predicates; std::map<u32, Id> flow_variables; Id local_memory{}; diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index a2f0044ba..cca13bcde 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -65,8 +65,8 @@ public: void DetachSegment(ASTNode start, ASTNode end); void Remove(ASTNode node); - ASTNode first{}; - ASTNode last{}; + ASTNode first; + ASTNode last; }; class ASTProgram { @@ -299,9 +299,9 @@ private: friend class ASTZipper; ASTData data; - ASTNode parent{}; - ASTNode next{}; - ASTNode previous{}; + ASTNode parent; + ASTNode next; + ASTNode previous; ASTZipper* manager{}; }; diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle return value; } +std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { + if (bound_buffer_saved) { + return bound_buffer; + } + if (!engine) { + return std::nullopt; + } + bound_buffer_saved = true; + bound_buffer = engine->GetBoundBuffer(); + return bound_buffer; +} + void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes bindless_samplers.insert_or_assign({buffer, offset}, sampler); } +void ConstBufferLocker::SetBoundBuffer(u32 buffer) { + bound_buffer_saved = true; + bound_buffer = buffer; +} + bool ConstBufferLocker::IsConsistent() const { if (!engine) { return false; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -10,6 +10,7 @@ #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" namespace VideoCommon::Shader { @@ -40,6 +41,8 @@ public: std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + std::optional<u32> ObtainBoundBuffer(); + /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -49,6 +52,9 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + /// Set the bound buffer for this locker. + void SetBoundBuffer(u32 buffer); + /// Checks keys and samplers against engine's current const buffers. Returns true if they are /// the same value, false otherwise; bool IsConsistent() const; @@ -71,12 +77,27 @@ public: return bindless_samplers; } + /// Gets bound buffer used on this shader + u32 GetBoundBuffer() const { + return bound_buffer; + } + + /// Obtains access to the guest driver's profile. + VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { + if (engine) { + return &engine->AccessGuestDriverProfile(); + } + return nullptr; + } + private: const Tegra::Engines::ShaderType stage; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; + bool bound_buffer_saved{}; + u32 bound_buffer{}; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <cstring> +#include <limits> #include <set> #include <fmt/format.h> @@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); + return; + } + if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + return; + } + u32 count{}; + std::vector<u32> bound_offsets; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + ++count; + bound_offsets.emplace_back(sampler.GetOffset()); + } + if (count > 1) { + gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + } +} + +std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, + VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + return std::nullopt; + } + const u32 base_offset = sampler_to_deduce.GetOffset(); + u32 max_offset{std::numeric_limits<u32>::max()}; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + if (sampler.GetOffset() > base_offset) { + max_offset = std::min(sampler.GetOffset(), max_offset); + } + } + if (max_offset == std::numeric_limits<u32>::max()) { + return std::nullopt; + } + return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); +} + } // Anonymous namespace class ASTDecoder { @@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } +void ShaderIR::PostDecode() { + // Deduce texture handler size if needed + auto gpu_driver = locker.AccessGuestDriverProfile(); + DeduceTextureHandlerSize(gpu_driver, used_samplers); + // Deduce Indexed Samplers + if (!uses_indexed_samplers) { + return; + } + for (auto& sampler : used_samplers) { + if (!sampler.IsIndexed()) { + continue; + } + if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { + sampler.SetSize(*size); + } else { + LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); + sampler.SetSize(1); + } + } +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 371fae127..e60875cc4 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -297,7 +297,7 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod const Node one = Immediate(1); const Node two = Immediate(2); - Node value{}; + Node value; for (u32 i = 0; i < lop_iterations; ++i) { const Node shift_amount = Immediate(i); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,9 +19,12 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; +using Tegra::Shader::GlobalAtomicOp; +using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::StoreType; namespace { @@ -61,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { } } +Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), + Immediate(size)); +} + +Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), + std::move(offset), Immediate(size)); +} + +Node Sign16Extend(Node value) { + Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); + return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); +} + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -136,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); [[fallthrough]]; case OpCode::Id::LD_S: { - const auto GetMemory = [&](s32 offset) { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); - const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), - immediate_offset); - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) - : GetLocalMemory(address); + return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); + }; + const auto GetMemory = [&](s32 offset) { + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) + : GetLocalMemory(GetAddress(offset)); }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: - case Tegra::Shader::StoreType::Bits64: - case Tegra::Shader::StoreType::Bits128: { - const u32 count = [&]() { + case StoreType::Signed16: + SetRegister(bb, instr.gpr0, + Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); + break; + case StoreType::Bits32: + case StoreType::Bits64: + case StoreType::Bits128: { + const u32 count = [&] { switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: return 1; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: return 2; - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: return 4; default: UNREACHABLE(); @@ -212,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { // To handle unaligned loads get the bytes used to dereference global memory and extract // those bytes from the loaded u32. if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), - std::move(offset), Immediate(size)); + gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); } SetTemporary(bb, i, gmem); @@ -269,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; - const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L - ? &ShaderIR::SetLocalMemory - : &ShaderIR::SetSharedMemory; + const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; + const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; + const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Signed16: { + Node address = GetAddress(0); + Node memory = (this->*get_memory)(address); + (this->*set_memory)( + bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); + break; + } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); @@ -323,18 +354,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node value = GetRegister(instr.gpr0.Value() + i); if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, - Immediate(size)); + const u32 mask = GetUnalignedMask(type); + value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::ATOM: { + UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", + static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", + static_cast<int>(instr.atom.type.Value())); + + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::ATOMS: { UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", static_cast<int>(instr.atoms.operation.Value())); @@ -348,7 +393,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node memory = GetSharedMemory(std::move(address)); Node data = GetRegister(instr.gpr20); - Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); + Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b567e39d..351c8c2f1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, + {}, {}, component, element, {}}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto derivate_reg = instr.gpr20.Value(); const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) + is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) : GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; if (sampler == nullptr) { @@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, + {}, {}, {}, element, index_var}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, - info.is_buffer); + info.is_buffer, false); } -const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); - const auto [base_sampler, buffer, offset] = - TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_sampler != nullptr); - if (base_sampler == nullptr) { + const auto [base_node, tracked_sampler_info] = + TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); + ASSERT(base_node != nullptr); + if (base_node == nullptr) { return nullptr; } - const auto info = GetSamplerInfo(sampler_info, offset, buffer); + if (const auto bindless_sampler_info = + std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { + const u32 buffer = bindless_sampler_info->GetIndex(); + const u32 offset = bindless_sampler_info->GetOffset(); + const auto info = GetSamplerInfo(sampler_info, offset, buffer); + + // If this sampler has already been used, return the existing mapping. + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [buffer = buffer, offset = offset](const Sampler& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != used_samplers.end()) { + ASSERT(it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); + return &*it; + } - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { - return entry.GetBuffer() == buffer && entry.GetOffset() == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && - it->IsShadow() == info.is_shadow); - return &*it; - } + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer, false); + } else if (const auto array_sampler_info = + std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { + const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; + index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); + const auto info = GetSamplerInfo(sampler_info, base_offset); + + // If this sampler has already been used, return the existing mapping. + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), + [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); + if (it != used_samplers.end()) { + ASSERT(!it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && + it->IsBuffer() == info.is_buffer && it->IsIndexed()); + return &*it; + } - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, - info.is_shadow, info.is_buffer); + uses_indexed_samplers = true; + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, + info.is_shadow, info.is_buffer, true); + } + return nullptr; } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, "This method is not supported."); const SamplerInfo info{texture_type, is_array, is_shadow, false}; - const Sampler* sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) + : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { for (u32 element = 0; element < values.size(); ++element) { @@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, + lod, {}, element, index_var}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -596,7 +630,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); } - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -632,7 +666,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, const Node array = is_array ? GetRegister(array_register) : nullptr; - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); const SamplerInfo info{texture_type, is_array, depth_compare, false}; - const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { @@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; + *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, + index_var}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } @@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..a0a7b9111 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,7 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - UAtomicAdd, /// (smem, uint) -> uint + AtomicAdd, /// (memory, {u}int) -> {u}int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void @@ -212,6 +212,7 @@ enum class MetaStackClass { class OperationNode; class ConditionalNode; class GprNode; +class CustomVarNode; class ImmediateNode; class InternalFlagNode; class PredicateNode; @@ -223,26 +224,32 @@ class SmemNode; class GmemNode; class CommentNode; -using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, +using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>; using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; +class BindlessSamplerNode; +class ArraySamplerNode; + +using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; +using TrackSampler = std::shared_ptr<TrackSamplerData>; + class Sampler { public: /// This constructor is for bound samplers constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_buffer{is_buffer} {} + is_buffer{is_buffer}, is_indexed{is_indexed} {} /// This constructor is for bindless samplers constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} + is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} constexpr u32 GetIndex() const { return index; @@ -276,16 +283,72 @@ public: return is_bindless; } + constexpr bool IsIndexed() const { + return is_indexed; + } + + constexpr u32 Size() const { + return size; + } + + constexpr void SetSize(u32 new_size) { + size = new_size; + } + private: u32 index{}; ///< Emulated index given for the this sampler. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). + u32 size{}; ///< Size of the sampler if indexed. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed{}; ///< Whether this sampler is an indexed array of textures. +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class ArraySamplerNode final { +public: + explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) + : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetBaseOffset() const { + return base_offset; + } + + constexpr u32 GetIndexVar() const { + return bindless_var; + } + +private: + u32 index; + u32 base_offset; + u32 bindless_var; +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class BindlessSamplerNode final { +public: + explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { + return offset; + } + +private: + u32 index; + u32 offset; }; class Image final { @@ -380,8 +443,9 @@ struct MetaTexture { std::vector<Node> derivates; Node bias; Node lod; - Node component{}; + Node component; u32 element{}; + Node index; }; struct MetaImage { @@ -488,6 +552,19 @@ private: Tegra::Shader::Register index{}; }; +/// A custom variable +class CustomVarNode final { +public: + explicit constexpr CustomVarNode(u32 index) : index{index} {} + + constexpr u32 GetIndex() const { + return index; + } + +private: + u32 index{}; +}; + /// A 32-bits value that represents an immediate value class ImmediateNode final { public: diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); } +template <typename T, typename... Args> +TrackSampler MakeTrackSampler(Args&&... args) { + static_assert(std::is_convertible_v<T, TrackSamplerData>); + return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); +} + template <typename... Args> Node Operation(OperationCode code, Args&&... args) { if constexpr (sizeof...(args) == 0) { diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); + PostDecode(); } ShaderIR::~ShaderIR() = default; @@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { return MakeNode<GprNode>(reg); } +Node ShaderIR::GetCustomVariable(u32 id) { + return MakeNode<CustomVarNode>(id); +} + Node ShaderIR::GetImmediate19(Instruction instr) { return Immediate(instr.alu.GetImm20_19()); } @@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { return id; } +u32 ShaderIR::NewCustomVariable() { + return num_custom_variables++; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -180,6 +180,10 @@ public: return amend_code[index]; } + u32 GetNumCustomVariables() const { + return num_custom_variables; + } + private: friend class ASTDecoder; @@ -191,6 +195,7 @@ private: }; void Decode(); + void PostDecode(); NodeBlock DecodeRange(u32 begin, u32 end); void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); @@ -235,6 +240,8 @@ private: /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); + /// Generates a node for a custom variable + Node GetCustomVariable(u32 id); /// Generates a node representing a 19-bit immediate value Node GetImmediate19(Tegra::Shader::Instruction instr); /// Generates a node representing a 32-bit immediate value @@ -321,7 +328,7 @@ private: std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses a texture sampler for a bindless texture. - const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, + const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses an image. @@ -387,6 +394,9 @@ private: std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; + std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, @@ -399,6 +409,8 @@ private: /// Register new amending code and obtain the reference id. std::size_t DeclareAmend(Node new_amend); + u32 NewCustomVariable(); + const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; @@ -414,6 +426,7 @@ private: NodeBlock global_code; ASTManager program_manager{true, true}; std::vector<Node> amend_code; + u32 num_custom_variables{}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; @@ -431,6 +444,7 @@ private: bool uses_instance_id{}; bool uses_vertex_id{}; bool uses_warps{}; + bool uses_indexed_samplers{}; Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..face8c943 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "video_core/shader/node.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } return {}; } + +std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { + if (operation.GetCode() != OperationCode::UAdd) { + return std::nullopt; + } + Node gpr; + Node offset; + ASSERT(operation.GetOperandsCount() == 2); + for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { + Node operand = operation[i]; + if (std::holds_alternative<ImmediateNode>(*operand)) { + offset = operation[i]; + } else if (std::holds_alternative<GprNode>(*operand)) { + gpr = operation[i]; + } + } + if (offset && gpr) { + return std::make_pair(gpr, offset); + } + return std::nullopt; +} + +bool AmendNodeCv(std::size_t amend_index, Node node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { + operation->SetAmendIndex(amend_index); + return true; + } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + conditional->SetAmendIndex(amend_index); + return true; + } + return false; +} + } // Anonymous namespace +std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor) { + if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { + // Constant buffer found, test if it's an immediate + const auto offset = cbuf->GetOffset(); + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + auto track = + MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); + return {tracked, track}; + } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + auto bound_buffer = locker.ObtainBoundBuffer(); + if (!bound_buffer) { + return {}; + } + if (*bound_buffer != cbuf->GetIndex()) { + return {}; + } + auto pair = DecoupleIndirectRead(*operation); + if (!pair) { + return {}; + } + auto [gpr, base_offset] = *pair; + const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); + auto gpu_driver = locker.AccessGuestDriverProfile(); + if (gpu_driver == nullptr) { + return {}; + } + const u32 bindless_cv = NewCustomVariable(); + const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, + Immediate(gpu_driver->GetTextureHandlerSize())); + + const Node cv_node = GetCustomVariable(bindless_cv); + Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); + const std::size_t amend_index = DeclareAmend(amend_op); + AmendNodeCv(amend_index, code[cursor]); + // TODO Implement Bindless Index custom variable + auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), + offset_inm->GetValue(), bindless_cv); + return {tracked, track}; + } + return {}; + } + if (const auto gpr = std::get_if<GprNode>(&*tracked)) { + if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { + return {}; + } + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); + if (!source) { + return {}; + } + return TrackBindlessSampler(source, code, new_cursor); + } + if (const auto operation = std::get_if<OperationNode>(&*tracked)) { + for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { + if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); + std::get<0>(found)) { + // Cbuf found in operand. + return found; + } + } + return {}; + } + if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { + const auto& conditional_code = conditional->GetCode(); + return TrackBindlessSampler(tracked, conditional_code, + static_cast<s64>(conditional_code.size())); + } + return {}; +} + std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 67c9a7c6d..96dec50e2 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -236,6 +236,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i widget->setVisible(false); analog_map_stick = {ui->buttonLStickAnalog, ui->buttonRStickAnalog}; + analog_map_deadzone = {ui->sliderLStickDeadzone, ui->sliderRStickDeadzone}; + analog_map_deadzone_label = {ui->labelLStickDeadzone, ui->labelRStickDeadzone}; for (int button_id = 0; button_id < Settings::NativeButton::NumButtons; button_id++) { auto* const button = button_map[button_id]; @@ -326,6 +328,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i InputCommon::Polling::DeviceType::Analog); } }); + connect(analog_map_deadzone[analog_id], &QSlider::valueChanged, [=] { + const float deadzone = analog_map_deadzone[analog_id]->value() / 100.0f; + analog_map_deadzone_label[analog_id]->setText(tr("Deadzone: %1").arg(deadzone)); + analogs_param[analog_id].Set("deadzone", deadzone); + }); } connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); }); @@ -484,7 +491,7 @@ void ConfigureInputPlayer::ClearAll() { continue; } - analogs_param[analog_id].Erase(analog_sub_buttons[sub_button_id]); + analogs_param[analog_id].Clear(); } } @@ -508,6 +515,23 @@ void ConfigureInputPlayer::UpdateButtonLabels() { AnalogToText(analogs_param[analog_id], analog_sub_buttons[sub_button_id])); } analog_map_stick[analog_id]->setText(tr("Set Analog Stick")); + + auto& param = analogs_param[analog_id]; + auto* const analog_deadzone_slider = analog_map_deadzone[analog_id]; + auto* const analog_deadzone_label = analog_map_deadzone_label[analog_id]; + + if (param.Has("engine") && param.Get("engine", "") == "sdl") { + if (!param.Has("deadzone")) { + param.Set("deadzone", 0.1f); + } + + analog_deadzone_slider->setValue(static_cast<int>(param.Get("deadzone", 0.1f) * 100)); + analog_deadzone_slider->setVisible(true); + analog_deadzone_label->setVisible(true); + } else { + analog_deadzone_slider->setVisible(false); + analog_deadzone_label->setVisible(false); + } } } diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index c66027651..045704e47 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -97,6 +97,8 @@ private: /// Analog inputs are also represented each with a single button, used to configure with an /// actual analog stick std::array<QPushButton*, Settings::NativeAnalog::NumAnalogs> analog_map_stick; + std::array<QSlider*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone; + std::array<QLabel*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone_label; static const std::array<std::string, ANALOG_SUB_BUTTONS_NUM> analog_sub_buttons; diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui index 42db020be..1556481d0 100644 --- a/src/yuzu/configuration/configure_input_player.ui +++ b/src/yuzu/configuration/configure_input_player.ui @@ -170,6 +170,44 @@ </item> </layout> </item> + <item row="4" column="0" colspan="2"> + <layout class="QVBoxLayout" name="sliderRStickDeadzoneVerticalLayout"> + <item> + <layout class="QHBoxLayout" name="sliderRStickDeadzoneHorizontalLayout"> + <item> + <widget class="QLabel" name="labelRStickDeadzone"> + <property name="text"> + <string>Deadzone: 0</string> + </property> + <property name="alignment"> + <enum>Qt::AlignHCenter</enum> + </property> + </widget> + </item> + </layout> + </item> + <item> + <widget class="QSlider" name="sliderRStickDeadzone"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + </widget> + </item> + </layout> + </item> + <item row="5" column="0"> + <spacer name="RStick_verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + </spacer> + </item> </layout> </widget> </item> @@ -745,6 +783,47 @@ </item> </layout> </item> + <item row="5" column="1" colspan="2"> + <layout class="QVBoxLayout" name="sliderLStickDeadzoneVerticalLayout"> + <property name="sizeConstraint"> + <enum>QLayout::SetDefaultConstraint</enum> + </property> + <item> + <layout class="QHBoxLayout" name="sliderLStickDeadzoneHorizontalLayout"> + <item> + <widget class="QLabel" name="labelLStickDeadzone"> + <property name="text"> + <string>Deadzone: 0</string> + </property> + <property name="alignment"> + <enum>Qt::AlignHCenter</enum> + </property> + </widget> + </item> + </layout> + </item> + <item> + <widget class="QSlider" name="sliderLStickDeadzone"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + </widget> + </item> + </layout> + </item> + <item row="6" column="1"> + <spacer name="LStick_verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + </spacer> + </item> </layout> </widget> </item> |