diff options
Diffstat (limited to 'src')
46 files changed, 362 insertions, 387 deletions
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index 4ce2d374e..8ab5649df 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -37,7 +37,7 @@ Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callbac : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)}, sink_stream{sink_stream}, name{std::move(name_)} { - release_event = CoreTiming::RegisterEvent( + release_event = Core::Timing::RegisterEvent( name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); }); } @@ -57,7 +57,7 @@ Stream::State Stream::GetState() const { s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const { const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; - return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate); + return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate); } static void VolumeAdjustSamples(std::vector<s16>& samples) { @@ -99,7 +99,8 @@ void Stream::PlayNextBuffer() { sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); - CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {}); + Core::Timing::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, + {}); } void Stream::ReleaseActiveBuffer() { diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h index aebfeb51d..caa775544 100644 --- a/src/audio_core/stream.h +++ b/src/audio_core/stream.h @@ -13,7 +13,7 @@ #include "audio_core/buffer.h" #include "common/common_types.h" -namespace CoreTiming { +namespace Core::Timing { struct EventType; } @@ -91,16 +91,16 @@ private: /// Gets the number of core cycles when the specified buffer will be released s64 GetBufferReleaseCycles(const Buffer& buffer) const; - u32 sample_rate; ///< Sample rate of the stream - Format format; ///< Format of the stream - ReleaseCallback release_callback; ///< Buffer release callback for the stream - State state{State::Stopped}; ///< Playback state of the stream - CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream - BufferPtr active_buffer; ///< Actively playing buffer in the stream - std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream - std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream - SinkStream& sink_stream; ///< Output sink for the stream - std::string name; ///< Name of the stream, must be unique + u32 sample_rate; ///< Sample rate of the stream + Format format; ///< Format of the stream + ReleaseCallback release_callback; ///< Buffer release callback for the stream + State state{State::Stopped}; ///< Playback state of the stream + Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream + BufferPtr active_buffer; ///< Actively playing buffer in the stream + std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream + std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream + SinkStream& sink_stream; ///< Output sink for the stream + std::string name; ///< Name of the stream, must be unique }; using StreamPtr = std::shared_ptr<Stream>; diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index afbda8d8b..f28951f8a 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -112,14 +112,14 @@ public: // Always execute at least one tick. amortized_ticks = std::max<u64>(amortized_ticks, 1); - CoreTiming::AddTicks(amortized_ticks); + Timing::AddTicks(amortized_ticks); num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { - return std::max(CoreTiming::GetDowncount(), 0); + return std::max(Timing::GetDowncount(), 0); } u64 GetCNTPCT() override { - return CoreTiming::GetTicks(); + return Timing::GetTicks(); } ARM_Dynarmic& parent; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index c455c81fb..c36c15c02 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -177,7 +177,7 @@ void ARM_Unicorn::Run() { if (GDBStub::IsServerEnabled()) { ExecuteInstructions(std::max(4000000, 0)); } else { - ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); + ExecuteInstructions(std::max(Timing::GetDowncount(), 0)); } } @@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64)); void ARM_Unicorn::ExecuteInstructions(int num_instructions) { MICROPROFILE_SCOPE(ARM_Jit_Unicorn); CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); - CoreTiming::AddTicks(num_instructions); + Timing::AddTicks(num_instructions); if (GDBStub::IsServerEnabled()) { if (last_bkpt_hit) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); diff --git a/src/core/core.cpp b/src/core/core.cpp index 1dd576c26..4d9d21ee4 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -94,7 +94,7 @@ struct System::Impl { ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) { LOG_DEBUG(HW_Memory, "initialized OK"); - CoreTiming::Init(); + Timing::Init(); kernel.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( @@ -205,7 +205,7 @@ struct System::Impl { // Shutdown kernel and core timing kernel.Shutdown(); - CoreTiming::Shutdown(); + Timing::Shutdown(); // Close app loader app_loader.reset(); @@ -232,7 +232,7 @@ struct System::Impl { } PerfStatsResults GetAndResetPerfStats() { - return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs()); + return perf_stats.GetAndResetStats(Timing::GetGlobalTimeUs()); } Kernel::KernelCore kernel; diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index fffda8a99..452366250 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -93,14 +93,14 @@ void Cpu::RunLoop(bool tight_loop) { if (IsMainCore()) { // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. - CoreTiming::Idle(); - CoreTiming::Advance(); + Timing::Idle(); + Timing::Advance(); } PrepareReschedule(); } else { if (IsMainCore()) { - CoreTiming::Advance(); + Timing::Advance(); } if (tight_loop) { diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 7953c8720..2b7ca9766 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -15,7 +15,7 @@ #include "common/threadsafe_queue.h" #include "core/core_timing_util.h" -namespace CoreTiming { +namespace Core::Timing { static s64 global_timer; static int slice_length; @@ -242,4 +242,4 @@ int GetDowncount() { return downcount; } -} // namespace CoreTiming +} // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 9ed757bd7..093989d4c 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -22,7 +22,7 @@ #include <string> #include "common/common_types.h" -namespace CoreTiming { +namespace Core::Timing { struct EventType; @@ -92,4 +92,4 @@ std::chrono::microseconds GetGlobalTimeUs(); int GetDowncount(); -} // namespace CoreTiming +} // namespace Core::Timing diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index 73dea4edb..88ff70233 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -8,7 +8,7 @@ #include <limits> #include "common/logging/log.h" -namespace CoreTiming { +namespace Core::Timing { constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE; @@ -60,4 +60,4 @@ s64 nsToCycles(u64 ns) { return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000; } -} // namespace CoreTiming +} // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index 5c3718782..513cfac1b 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -6,7 +6,7 @@ #include "common/common_types.h" -namespace CoreTiming { +namespace Core::Timing { // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz // The exact value used is of course unverified. @@ -61,4 +61,4 @@ inline u64 cyclesToMs(s64 cycles) { return cycles * 1000 / BASE_CLOCK_RATE; } -} // namespace CoreTiming +} // namespace Core::Timing diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 7a524ce5a..3721ae8fe 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -124,7 +124,7 @@ struct KernelCore::Impl { void InitializeThreads() { thread_wakeup_event_type = - CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); + Core::Timing::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); } std::atomic<u32> next_object_id{0}; @@ -137,7 +137,7 @@ struct KernelCore::Impl { SharedPtr<ResourceLimit> system_resource_limit; - CoreTiming::EventType* thread_wakeup_event_type = nullptr; + Core::Timing::EventType* thread_wakeup_event_type = nullptr; // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, // allowing us to simply use a pool index or similar. Kernel::HandleTable thread_wakeup_callback_handle_table; @@ -213,7 +213,7 @@ u64 KernelCore::CreateNewProcessID() { return impl->next_process_id++; } -CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const { +Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const { return impl->thread_wakeup_event_type; } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index c643a6401..7406f107e 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,7 +11,7 @@ template <typename T> class ResultVal; -namespace CoreTiming { +namespace Core::Timing { struct EventType; } @@ -89,7 +89,7 @@ private: u64 CreateNewThreadID(); /// Retrieves the event type used for thread wakeup callbacks. - CoreTiming::EventType* ThreadWakeupCallbackEventType() const; + Core::Timing::EventType* ThreadWakeupCallbackEventType() const; /// Provides a reference to the thread wakeup callback handle table. Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index df4d6cf0a..9e2517e1b 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -111,7 +111,7 @@ void Scheduler::SwitchContext(Thread* new_thread) { void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { const u64 prev_switch_ticks = last_context_switch_time; - const u64 most_recent_switch_ticks = CoreTiming::GetTicks(); + const u64 most_recent_switch_ticks = Core::Timing::GetTicks(); const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; if (thread != nullptr) { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 7cfecb68c..5f040f79f 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -927,9 +927,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); - out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks); + out_ticks = thread_ticks + (Core::Timing::GetTicks() - prev_ctx_ticks); } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { - out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks; + out_ticks = Core::Timing::GetTicks() - prev_ctx_ticks; } *result = out_ticks; @@ -1546,10 +1546,10 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to static u64 GetSystemTick() { LOG_TRACE(Kernel_SVC, "called"); - const u64 result{CoreTiming::GetTicks()}; + const u64 result{Core::Timing::GetTicks()}; // Advance time to defeat dumb games that busy-wait for the frame to end. - CoreTiming::AddTicks(400); + Core::Timing::AddTicks(400); return result; } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index d3984dfc4..7881c2b90 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -43,7 +43,7 @@ Thread::~Thread() = default; void Thread::Stop() { // Cancel any outstanding wakeup events for this thread - CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); + Core::Timing::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; @@ -85,12 +85,13 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { // This function might be called from any thread so we have to be cautious and use the // thread-safe version of ScheduleEvent. - CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), - kernel.ThreadWakeupCallbackEventType(), callback_handle); + Core::Timing::ScheduleEventThreadsafe(Core::Timing::nsToCycles(nanoseconds), + kernel.ThreadWakeupCallbackEventType(), callback_handle); } void Thread::CancelWakeupTimer() { - CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle); + Core::Timing::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), + callback_handle); } static std::optional<s32> GetNextProcessorId(u64 mask) { @@ -197,7 +198,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name thread->stack_top = stack_top; thread->tpidr_el0 = 0; thread->nominal_priority = thread->current_priority = priority; - thread->last_running_ticks = CoreTiming::GetTicks(); + thread->last_running_ticks = Core::Timing::GetTicks(); thread->processor_id = processor_id; thread->ideal_core = processor_id; thread->affinity_mask = 1ULL << processor_id; @@ -257,7 +258,7 @@ void Thread::SetStatus(ThreadStatus new_status) { } if (status == ThreadStatus::Running) { - last_running_ticks = CoreTiming::GetTicks(); + last_running_ticks = Core::Timing::GetTicks(); } status = new_status; diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp index c22357d8c..b264c9503 100644 --- a/src/core/hle/service/hid/controllers/debug_pad.cpp +++ b/src/core/hle/service/hid/controllers/debug_pad.cpp @@ -22,7 +22,7 @@ void Controller_DebugPad::OnInit() {} void Controller_DebugPad::OnRelease() {} void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = CoreTiming::GetTicks(); + shared_memory.header.timestamp = Core::Timing::GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp index 898572277..6d21f1a7d 100644 --- a/src/core/hle/service/hid/controllers/gesture.cpp +++ b/src/core/hle/service/hid/controllers/gesture.cpp @@ -18,7 +18,7 @@ void Controller_Gesture::OnInit() {} void Controller_Gesture::OnRelease() {} void Controller_Gesture::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = CoreTiming::GetTicks(); + shared_memory.header.timestamp = Core::Timing::GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp index ca75adc2b..798f30436 100644 --- a/src/core/hle/service/hid/controllers/keyboard.cpp +++ b/src/core/hle/service/hid/controllers/keyboard.cpp @@ -20,7 +20,7 @@ void Controller_Keyboard::OnInit() {} void Controller_Keyboard::OnRelease() {} void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = CoreTiming::GetTicks(); + shared_memory.header.timestamp = Core::Timing::GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp index 63391dbe9..4985037be 100644 --- a/src/core/hle/service/hid/controllers/mouse.cpp +++ b/src/core/hle/service/hid/controllers/mouse.cpp @@ -18,7 +18,7 @@ void Controller_Mouse::OnInit() {} void Controller_Mouse::OnRelease() {} void Controller_Mouse::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = CoreTiming::GetTicks(); + shared_memory.header.timestamp = Core::Timing::GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 04c8c35a8..ffdd1c593 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -308,7 +308,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) { const auto& last_entry = main_controller->npad[main_controller->common.last_entry_index]; - main_controller->common.timestamp = CoreTiming::GetTicks(); + main_controller->common.timestamp = Core::Timing::GetTicks(); main_controller->common.last_entry_index = (main_controller->common.last_entry_index + 1) % 17; diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp index 02fcfadd9..cca4dca1d 100644 --- a/src/core/hle/service/hid/controllers/stubbed.cpp +++ b/src/core/hle/service/hid/controllers/stubbed.cpp @@ -22,7 +22,7 @@ void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) { } CommonHeader header{}; - header.timestamp = CoreTiming::GetTicks(); + header.timestamp = Core::Timing::GetTicks(); header.total_entry_count = 17; header.entry_count = 0; header.last_entry_index = 0; diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp index f666b1bd8..a7c8acc72 100644 --- a/src/core/hle/service/hid/controllers/touchscreen.cpp +++ b/src/core/hle/service/hid/controllers/touchscreen.cpp @@ -21,7 +21,7 @@ void Controller_Touchscreen::OnInit() {} void Controller_Touchscreen::OnRelease() {} void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = CoreTiming::GetTicks(); + shared_memory.header.timestamp = Core::Timing::GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { @@ -48,7 +48,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) { touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; - const u64 tick = CoreTiming::GetTicks(); + const u64 tick = Core::Timing::GetTicks(); touch_entry.delta_time = tick - last_touch; last_touch = tick; touch_entry.finger = Settings::values.touchscreen.finger; diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp index cd397c70b..eff03d14e 100644 --- a/src/core/hle/service/hid/controllers/xpad.cpp +++ b/src/core/hle/service/hid/controllers/xpad.cpp @@ -19,7 +19,7 @@ void Controller_XPad::OnRelease() {} void Controller_XPad::OnUpdate(u8* data, std::size_t size) { for (auto& xpad_entry : shared_memory.shared_memory_entries) { - xpad_entry.header.timestamp = CoreTiming::GetTicks(); + xpad_entry.header.timestamp = Core::Timing::GetTicks(); xpad_entry.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 008bf3f02..79c320d04 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -36,9 +36,9 @@ namespace Service::HID { // Updating period for each HID device. // TODO(ogniK): Find actual polling rate of hid -constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66; -constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; -constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100; +constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66; +constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100; +constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100; constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") { @@ -73,14 +73,13 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") { GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000); // Register update callbacks - pad_update_event = - CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) { - UpdateControllers(userdata, cycles_late); - }); + pad_update_event = Core::Timing::RegisterEvent( + "HID::UpdatePadCallback", + [this](u64 userdata, int cycles_late) { UpdateControllers(userdata, cycles_late); }); // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) - CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event); + Core::Timing::ScheduleEvent(pad_update_ticks, pad_update_event); ReloadInputDevices(); } @@ -94,7 +93,7 @@ void IAppletResource::DeactivateController(HidController controller) { } IAppletResource ::~IAppletResource() { - CoreTiming::UnscheduleEvent(pad_update_event, 0); + Core::Timing::UnscheduleEvent(pad_update_event, 0); } void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { @@ -114,7 +113,7 @@ void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) { controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE); } - CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); + Core::Timing::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); } class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index eca27c056..6d897c842 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -7,7 +7,7 @@ #include "controllers/controller_base.h" #include "core/hle/service/service.h" -namespace CoreTiming { +namespace Core::Timing { struct EventType; } @@ -66,7 +66,7 @@ private: Kernel::SharedPtr<Kernel::SharedMemory> shared_mem; - CoreTiming::EventType* pad_update_event; + Core::Timing::EventType* pad_update_event; std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)> controllers{}; diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index 3c7f8b1ee..b427d4068 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 5}; rb.Push(RESULT_SUCCESS); - rb.PushRaw<u64>(CoreTiming::GetTicks()); + rb.PushRaw<u64>(Core::Timing::GetTicks()); rb.PushRaw<u32>(0); } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index d57a54ee8..88d80ba06 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -184,7 +184,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o IoctlGetGpuTime params{}; std::memcpy(¶ms, input.data(), input.size()); - params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks()); + params.gpu_time = Core::Timing::cyclesToNs(Core::Timing::GetTicks()); std::memcpy(output.data(), ¶ms, output.size()); return 0; } diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index b171f256c..ab90d591e 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -13,10 +13,6 @@ #include "core/hle/kernel/object.h" #include "core/hle/kernel/writable_event.h" -namespace CoreTiming { -struct EventType; -} - namespace Service::NVFlinger { struct IGBPBuffer { diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index cde06916d..ce1b59860 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -25,21 +25,21 @@ namespace Service::NVFlinger { constexpr std::size_t SCREEN_REFRESH_RATE = 60; -constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); +constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); NVFlinger::NVFlinger() { // Schedule the screen composition events composition_event = - CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { + Core::Timing::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { Compose(); - CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event); + Core::Timing::ScheduleEvent(frame_ticks - cycles_late, composition_event); }); - CoreTiming::ScheduleEvent(frame_ticks, composition_event); + Core::Timing::ScheduleEvent(frame_ticks, composition_event); } NVFlinger::~NVFlinger() { - CoreTiming::UnscheduleEvent(composition_event, 0); + Core::Timing::UnscheduleEvent(composition_event, 0); } void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 4c55e99f4..6d8bcbd30 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -14,7 +14,7 @@ #include "common/common_types.h" #include "core/hle/kernel/object.h" -namespace CoreTiming { +namespace Core::Timing { struct EventType; } @@ -115,8 +115,8 @@ private: /// layers. u32 next_buffer_queue_id = 1; - /// CoreTiming event that handles screen composition. - CoreTiming::EventType* composition_event; + /// Event that handles screen composition. + Core::Timing::EventType* composition_event; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index c13640ad8..efebd1b24 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -106,8 +106,8 @@ private: void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); - SteadyClockTimePoint steady_clock_time_point{ - CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000}; + const SteadyClockTimePoint steady_clock_time_point{ + Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000}; IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; rb.Push(RESULT_SUCCESS); rb.PushRaw(steady_clock_time_point); @@ -282,7 +282,7 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) { } const SteadyClockTimePoint steady_clock_time_point{ - CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}}; + Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000, {}}; CalendarTime calendar_time{}; calendar_time.year = tm->tm_year + 1900; diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 2242c14cf..77607a755 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -31,10 +31,10 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) { class ScopeInit final { public: ScopeInit() { - CoreTiming::Init(); + Core::Timing::Init(); } ~ScopeInit() { - CoreTiming::Shutdown(); + Core::Timing::Shutdown(); } }; @@ -44,37 +44,37 @@ static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0, expected_callback = CB_IDS[idx]; lateness = expected_lateness; - CoreTiming::AddTicks(CoreTiming::GetDowncount() - - cpu_downcount); // Pretend we executed X cycles of instructions. - CoreTiming::Advance(); + // Pretend we executed X cycles of instructions. + Core::Timing::AddTicks(Core::Timing::GetDowncount() - cpu_downcount); + Core::Timing::Advance(); REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); - REQUIRE(downcount == CoreTiming::GetDowncount()); + REQUIRE(downcount == Core::Timing::GetDowncount()); } TEST_CASE("CoreTiming[BasicOrder]", "[core]") { ScopeInit guard; - CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); - CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); - CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); - CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); - CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); + Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>); + Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>); // Enter slice 0 - CoreTiming::Advance(); + Core::Timing::Advance(); // D -> B -> C -> A -> E - CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); - REQUIRE(1000 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]); - REQUIRE(500 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]); - REQUIRE(500 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]); - REQUIRE(100 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]); - REQUIRE(100 == CoreTiming::GetDowncount()); + Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]); + REQUIRE(1000 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEvent(500, cb_b, CB_IDS[1]); + REQUIRE(500 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEvent(800, cb_c, CB_IDS[2]); + REQUIRE(500 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEvent(100, cb_d, CB_IDS[3]); + REQUIRE(100 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEvent(1200, cb_e, CB_IDS[4]); + REQUIRE(100 == Core::Timing::GetDowncount()); AdvanceAndCheck(3, 400); AdvanceAndCheck(1, 300); @@ -86,36 +86,36 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") { TEST_CASE("CoreTiming[Threadsave]", "[core]") { ScopeInit guard; - CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); - CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); - CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); - CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>); - CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>); + Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>); + Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>); // Enter slice 0 - CoreTiming::Advance(); + Core::Timing::Advance(); // D -> B -> C -> A -> E - CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]); + Core::Timing::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]); // Manually force since ScheduleEventThreadsafe doesn't call it - CoreTiming::ForceExceptionCheck(1000); - REQUIRE(1000 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]); + Core::Timing::ForceExceptionCheck(1000); + REQUIRE(1000 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]); // Manually force since ScheduleEventThreadsafe doesn't call it - CoreTiming::ForceExceptionCheck(500); - REQUIRE(500 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]); + Core::Timing::ForceExceptionCheck(500); + REQUIRE(500 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]); // Manually force since ScheduleEventThreadsafe doesn't call it - CoreTiming::ForceExceptionCheck(800); - REQUIRE(500 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]); + Core::Timing::ForceExceptionCheck(800); + REQUIRE(500 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]); // Manually force since ScheduleEventThreadsafe doesn't call it - CoreTiming::ForceExceptionCheck(100); - REQUIRE(100 == CoreTiming::GetDowncount()); - CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]); + Core::Timing::ForceExceptionCheck(100); + REQUIRE(100 == Core::Timing::GetDowncount()); + Core::Timing::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]); // Manually force since ScheduleEventThreadsafe doesn't call it - CoreTiming::ForceExceptionCheck(1200); - REQUIRE(100 == CoreTiming::GetDowncount()); + Core::Timing::ForceExceptionCheck(1200); + REQUIRE(100 == Core::Timing::GetDowncount()); AdvanceAndCheck(3, 400); AdvanceAndCheck(1, 300); @@ -143,42 +143,42 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") { ScopeInit guard; - CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>); - CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>); - CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>); - CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>); - CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>); + Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", FifoCallback<0>); + Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", FifoCallback<1>); + Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", FifoCallback<2>); + Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", FifoCallback<3>); + Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", FifoCallback<4>); - CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]); - CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); - CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]); - CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]); - CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]); + Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]); + Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]); + Core::Timing::ScheduleEvent(1000, cb_c, CB_IDS[2]); + Core::Timing::ScheduleEvent(1000, cb_d, CB_IDS[3]); + Core::Timing::ScheduleEvent(1000, cb_e, CB_IDS[4]); // Enter slice 0 - CoreTiming::Advance(); - REQUIRE(1000 == CoreTiming::GetDowncount()); + Core::Timing::Advance(); + REQUIRE(1000 == Core::Timing::GetDowncount()); callbacks_ran_flags = 0; counter = 0; lateness = 0; - CoreTiming::AddTicks(CoreTiming::GetDowncount()); - CoreTiming::Advance(); - REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); + Core::Timing::AddTicks(Core::Timing::GetDowncount()); + Core::Timing::Advance(); + REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount()); REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); } -TEST_CASE("CoreTiming[PredictableLateness]", "[core]") { +TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { ScopeInit guard; - CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); - CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); // Enter slice 0 - CoreTiming::Advance(); + Core::Timing::Advance(); - CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]); - CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]); + Core::Timing::ScheduleEvent(100, cb_a, CB_IDS[0]); + Core::Timing::ScheduleEvent(200, cb_b, CB_IDS[1]); AdvanceAndCheck(0, 90, 10, -10); // (100 - 10) AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50); @@ -192,9 +192,10 @@ static void RescheduleCallback(u64 userdata, s64 cycles_late) { REQUIRE(reschedules >= 0); REQUIRE(lateness == cycles_late); - if (reschedules > 0) - CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata), - userdata); + if (reschedules > 0) { + Core::Timing::ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata), + userdata); + } } } // namespace ChainSchedulingTest @@ -203,35 +204,35 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { ScopeInit guard; - CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>); - CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>); - CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>); - CoreTiming::EventType* cb_rs = - CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback); + Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_rs = + Core::Timing::RegisterEvent("callbackReschedule", RescheduleCallback); // Enter slice 0 - CoreTiming::Advance(); + Core::Timing::Advance(); - CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]); - CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]); - CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]); - CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs)); - REQUIRE(800 == CoreTiming::GetDowncount()); + Core::Timing::ScheduleEvent(800, cb_a, CB_IDS[0]); + Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]); + Core::Timing::ScheduleEvent(2200, cb_c, CB_IDS[2]); + Core::Timing::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs)); + REQUIRE(800 == Core::Timing::GetDowncount()); reschedules = 3; AdvanceAndCheck(0, 200); // cb_a AdvanceAndCheck(1, 1000); // cb_b, cb_rs REQUIRE(2 == reschedules); - CoreTiming::AddTicks(CoreTiming::GetDowncount()); - CoreTiming::Advance(); // cb_rs + Core::Timing::AddTicks(Core::Timing::GetDowncount()); + Core::Timing::Advance(); // cb_rs REQUIRE(1 == reschedules); - REQUIRE(200 == CoreTiming::GetDowncount()); + REQUIRE(200 == Core::Timing::GetDowncount()); AdvanceAndCheck(2, 800); // cb_c - CoreTiming::AddTicks(CoreTiming::GetDowncount()); - CoreTiming::Advance(); // cb_rs + Core::Timing::AddTicks(Core::Timing::GetDowncount()); + Core::Timing::Advance(); // cb_rs REQUIRE(0 == reschedules); - REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount()); + REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount()); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 33e507e69..1db0d031d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -5,12 +5,12 @@ add_library(video_core STATIC debug_utils/debug_utils.h engines/fermi_2d.cpp engines/fermi_2d.h + engines/kepler_compute.cpp + engines/kepler_compute.h engines/kepler_memory.cpp engines/kepler_memory.h engines/maxwell_3d.cpp engines/maxwell_3d.h - engines/maxwell_compute.cpp - engines/maxwell_compute.h engines/maxwell_dma.cpp engines/maxwell_dma.h engines/shader_bytecode.h diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp new file mode 100644 index 000000000..4ca856b6b --- /dev/null +++ b/src/video_core/engines/kepler_compute.cpp @@ -0,0 +1,34 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/memory_manager.h" + +namespace Tegra::Engines { + +KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} + +KeplerCompute::~KeplerCompute() = default; + +void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { + ASSERT_MSG(method_call.method < Regs::NUM_REGS, + "Invalid KeplerCompute register, increase the size of the Regs structure"); + + regs.reg_array[method_call.method] = method_call.argument; + + switch (method_call.method) { + case KEPLER_COMPUTE_REG_INDEX(launch): + // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA + // kernels) + UNREACHABLE_MSG("Compute shaders are not implemented"); + break; + default: + break; + } +} + +} // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h index 1d71f11bd..df0a32e0f 100644 --- a/src/video_core/engines/maxwell_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,47 +10,48 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" +#include "video_core/memory_manager.h" namespace Tegra::Engines { -#define MAXWELL_COMPUTE_REG_INDEX(field_name) \ - (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32)) +#define KEPLER_COMPUTE_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class MaxwellCompute final { +class KeplerCompute final { public: - MaxwellCompute() = default; - ~MaxwellCompute() = default; + explicit KeplerCompute(MemoryManager& memory_manager); + ~KeplerCompute(); + + static constexpr std::size_t NumConstBuffers = 8; struct Regs { static constexpr std::size_t NUM_REGS = 0xCF8; union { struct { - INSERT_PADDING_WORDS(0x281); + INSERT_PADDING_WORDS(0xAF); - union { - u32 compute_end; - BitField<0, 1, u32> unknown; - } compute; + u32 launch; - INSERT_PADDING_WORDS(0xA76); + INSERT_PADDING_WORDS(0xC48); }; std::array<u32, NUM_REGS> reg_array; }; } regs{}; - static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), - "MaxwellCompute Regs has wrong size"); + "KeplerCompute Regs has wrong size"); + + MemoryManager& memory_manager; /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \ + static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(compute, 0x281); +ASSERT_REG_POSITION(launch, 0xAF); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 10eae6a65..19b6b14b2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() { LongQueryResult query_result{}; query_result.value = result; // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = CoreTiming::GetTicks(); + query_result.timestamp = Core::Timing::GetTicks(); Memory::WriteBlock(*address, &query_result, sizeof(query_result)); } dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp deleted file mode 100644 index 656db6a61..000000000 --- a/src/video_core/engines/maxwell_compute.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "core/core.h" -#include "video_core/engines/maxwell_compute.h" - -namespace Tegra::Engines { - -void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) { - ASSERT_MSG(method_call.method < Regs::NUM_REGS, - "Invalid MaxwellCompute register, increase the size of the Regs structure"); - - regs.reg_array[method_call.method] = method_call.argument; - - switch (method_call.method) { - case MAXWELL_COMPUTE_REG_INDEX(compute): { - LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented"); - UNREACHABLE(); - break; - } - default: - break; - } -} - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 269df9437..1f425f90b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -186,7 +186,7 @@ enum class SubOp : u64 { }; enum class F2iRoundingOp : u64 { - None = 0, + RoundEven = 0, Floor = 1, Ceil = 2, Trunc = 3, diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 07d4ad9f7..3d00c308b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,9 +6,9 @@ #include "core/core_timing.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/maxwell_compute.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" @@ -32,7 +32,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); - maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); + kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); } @@ -246,8 +246,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) { case EngineID::MAXWELL_B: maxwell_3d->CallMethod(method_call); break; - case EngineID::MAXWELL_COMPUTE_B: - maxwell_compute->CallMethod(method_call); + case EngineID::KEPLER_COMPUTE_B: + kepler_compute->CallMethod(method_call); break; case EngineID::MAXWELL_DMA_COPY_A: maxwell_dma->CallMethod(method_call); @@ -283,7 +283,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = CoreTiming::GetTicks(); + block.timestamp = Core::Timing::GetTicks(); Memory::WriteBlock(*address, &block, sizeof(block)); } else { const auto address = diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 19b62dc7e..a482196ea 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -103,15 +103,15 @@ struct FramebufferConfig { namespace Engines { class Fermi2D; class Maxwell3D; -class MaxwellCompute; class MaxwellDMA; +class KeplerCompute; class KeplerMemory; } // namespace Engines enum class EngineID { FERMI_TWOD_A = 0x902D, // 2D Engine MAXWELL_B = 0xB197, // 3D Engine - MAXWELL_COMPUTE_B = 0xB1C0, + KEPLER_COMPUTE_B = 0xB1C0, KEPLER_INLINE_TO_MEMORY_B = 0xA140, MAXWELL_DMA_COPY_A = 0xB0B5, }; @@ -209,7 +209,7 @@ private: /// 2D engine std::unique_ptr<Engines::Fermi2D> fermi_2d; /// Compute engine - std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; + std::unique_ptr<Engines::KeplerCompute> kepler_compute; /// DMA engine std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 70e124dc4..b39bb4843 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -719,45 +719,51 @@ private: constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - const auto count = static_cast<u32>(operation.GetOperandsCount()); ASSERT(meta); + const auto count = static_cast<u32>(operation.GetOperandsCount()); + const bool has_array = meta->sampler.IsArray(); + const bool has_shadow = meta->sampler.IsShadow(); + std::string expr = func; expr += '('; expr += GetSampler(meta->sampler); expr += ", "; - expr += coord_constructors[meta->coords_count - 1]; + expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); expr += '('; for (u32 i = 0; i < count; ++i) { - const bool is_extra = i >= meta->coords_count; - const bool is_array = i == meta->array_index; - - std::string operand = [&]() { - if (is_extra && is_extra_int) { - if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) { - return std::to_string(static_cast<s32>(immediate->GetValue())); - } else { - return "ftoi(" + Visit(operation[i]) + ')'; - } - } else { - return Visit(operation[i]); - } - }(); - if (is_array) { - ASSERT(!is_extra); - operand = "float(ftoi(" + operand + "))"; - } - - expr += operand; + expr += Visit(operation[i]); - if (i + 1 == meta->coords_count) { - expr += ')'; - } - if (i + 1 < count) { + const u32 next = i + 1; + if (next < count || has_array || has_shadow) + expr += ", "; + } + if (has_array) { + expr += "float(ftoi(" + Visit(meta->array) + "))"; + } + if (has_shadow) { + if (has_array) expr += ", "; + expr += Visit(meta->depth_compare); + } + expr += ')'; + + for (const Node extra : meta->extras) { + expr += ", "; + if (is_extra_int) { + if (const auto immediate = std::get_if<ImmediateNode>(extra)) { + // Inline the string as an immediate integer in GLSL (some extra arguments are + // required to be constant) + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(extra) + ')'; + } + } else { + expr += Visit(extra); } } + expr += ')'; return expr; } @@ -1134,7 +1140,7 @@ private: Type::HalfFloat); } - std::string F4Texture(Operation operation) { + std::string Texture(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1145,7 +1151,7 @@ private: return expr + GetSwizzle(meta->element); } - std::string F4TextureLod(Operation operation) { + std::string TextureLod(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1156,7 +1162,7 @@ private: return expr + GetSwizzle(meta->element); } - std::string F4TextureGather(Operation operation) { + std::string TextureGather(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1164,7 +1170,7 @@ private: GetSwizzle(meta->element); } - std::string F4TextureQueryDimensions(Operation operation) { + std::string TextureQueryDimensions(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1184,7 +1190,7 @@ private: return "0"; } - std::string F4TextureQueryLod(Operation operation) { + std::string TextureQueryLod(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1195,29 +1201,32 @@ private: return "0"; } - std::string F4TexelFetch(Operation operation) { + std::string TexelFetch(Operation operation) { constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - const auto count = static_cast<u32>(operation.GetOperandsCount()); ASSERT(meta); + UNIMPLEMENTED_IF(meta->sampler.IsArray()); + UNIMPLEMENTED_IF(!meta->extras.empty()); + + const auto count = static_cast<u32>(operation.GetOperandsCount()); std::string expr = "texelFetch("; expr += GetSampler(meta->sampler); expr += ", "; - expr += constructors[meta->coords_count - 1]; + expr += constructors.at(count - 1); expr += '('; for (u32 i = 0; i < count; ++i) { expr += VisitOperand(operation, i, Type::Int); - if (i + 1 == meta->coords_count) { + const u32 next = i + 1; + if (next == count) expr += ')'; - } - if (i + 1 < count) { + if (next < count) expr += ", "; - } } expr += ')'; + return expr + GetSwizzle(meta->element); } @@ -1454,12 +1463,12 @@ private: &GLSLDecompiler::Logical2HNotEqual, &GLSLDecompiler::Logical2HGreaterEqual, - &GLSLDecompiler::F4Texture, - &GLSLDecompiler::F4TextureLod, - &GLSLDecompiler::F4TextureGather, - &GLSLDecompiler::F4TextureQueryDimensions, - &GLSLDecompiler::F4TextureQueryLod, - &GLSLDecompiler::F4TexelFetch, + &GLSLDecompiler::Texture, + &GLSLDecompiler::TextureLod, + &GLSLDecompiler::TextureGather, + &GLSLDecompiler::TextureQueryDimensions, + &GLSLDecompiler::TextureQueryLod, + &GLSLDecompiler::TexelFetch, &GLSLDecompiler::Branch, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6476a9e1a..cca2ed708 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { - Core::System::GetInstance().GetPerfStats().EndSystemFrame(); + system.GetPerfStats().EndSystemFrame(); // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); @@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers( render_window.PollEvents(); - Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); - Core::System::GetInstance().GetPerfStats().BeginSystemFrame(); + system.FrameLimiter().DoFrameLimiting(Core::Timing::GetGlobalTimeUs()); + system.GetPerfStats().BeginSystemFrame(); // Restore the rasterizer state prev_state.Apply(); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 38bb692d6..9fd4b273e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod SetRegister(bb, dest, value); } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index a992f73f8..55a6fbbf2 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = [&]() { switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::None: - return value; + case Tegra::Shader::F2iRoundingOp::RoundEven: + return Operation(OperationCode::FRoundEven, PRECISE, value); case Tegra::Shader::F2iRoundingOp::Floor: return Operation(OperationCode::FFloor, PRECISE, value); case Tegra::Shader::F2iRoundingOp::Ceil: @@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e006f8138..523421794 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -306,7 +306,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case OpCode::Id::TLD4S: { UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); } @@ -315,9 +314,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node op_a = GetRegister(instr.gpr8); const Node op_b = GetRegister(instr.gpr20); - std::vector<Node> coords; - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. + std::vector<Node> coords; if (depth_compare) { // Note: TLD4S coordinate encoding works just like TEXS's const Node op_y = GetRegister(instr.gpr8.Value() + 1); @@ -328,18 +326,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { coords.push_back(op_a); coords.push_back(op_b); } - const auto num_coords = static_cast<u32>(coords.size()); - coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); + std::vector<Node> extras; + extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); const auto& sampler = GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, num_coords}; - values[element] = - Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + auto coords_copy = coords; + MetaTexture meta{sampler, {}, {}, extras, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } WriteTexsInstructionFloat(bb, instr, values); @@ -360,12 +357,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { switch (instr.txq.query_type) { case Tegra::Shader::TextureQueryType::Dimension: { for (u32 element = 0; element < 4; ++element) { - if (instr.txq.IsComponentEnabled(element)) { - MetaTexture meta{sampler, element}; - const Node value = Operation(OperationCode::F4TextureQueryDimensions, - std::move(meta), GetRegister(instr.gpr8)); - SetTemporal(bb, indexer++, value); + if (!instr.txq.IsComponentEnabled(element)) { + continue; } + MetaTexture meta{sampler, {}, {}, {}, element}; + const Node value = + Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + SetTemporal(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); @@ -412,9 +410,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { for (u32 element = 0; element < 2; ++element) { auto params = coords; - MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; - const Node value = - Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); + MetaTexture meta{sampler, {}, {}, {}, element}; + const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporal(bb, element, value); } for (u32 element = 0; element < 2; ++element) { @@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, } Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - std::size_t array_offset, std::size_t bias_offset, - std::vector<Node>&& coords) { - UNIMPLEMENTED_IF_MSG( - (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || - (texture_type == TextureType::TextureCube && is_array && depth_compare), - "This method is not supported."); + TextureProcessMode process_mode, std::vector<Node> coords, + Node array, Node depth_compare, u32 bias_offset) { + const bool is_array = array; + const bool is_shadow = depth_compare; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || + (texture_type == TextureType::TextureCube && is_array && is_shadow), + "This method is not supported."); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, // LOD selection (either via bias or explicit textureLod) not supported in GL for // sampler2DArrayShadow and samplerCubeArrayShadow. const bool gl_lod_supported = - !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || - (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); + !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); const OperationCode read_method = - lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; + lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture; UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); - std::optional<u32> array_offset_value; - if (is_array) - array_offset_value = static_cast<u32>(array_offset); - - const auto coords_count = static_cast<u32>(coords.size()); - + std::vector<Node> extras; if (process_mode != TextureProcessMode::None && gl_lod_supported) { if (process_mode == TextureProcessMode::LZ) { - coords.push_back(Immediate(0.0f)); + extras.push_back(Immediate(0.0f)); } else { // If present, lod or bias are always stored in the register indexed by the gpr20 // field with an offset depending on the usage of the other registers - coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); + extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); } } Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, coords_count, array_offset_value}; - values[element] = Operation(read_method, std::move(meta), std::move(params)); + auto copy_coords = coords; + MetaTexture meta{sampler, array, depth_compare, extras, element}; + values[element] = Operation(read_method, meta, std::move(copy_coords)); } return values; @@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, for (std::size_t i = 0; i < coord_count; ++i) { coords.push_back(GetRegister(coord_register + i)); } - // 1D.DC in opengl the 2nd component is ignored. + // 1D.DC in OpenGL the 2nd component is ignored. if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { coords.push_back(Immediate(0.0f)); } - std::size_t array_offset{}; - if (is_array) { - array_offset = coords.size(); - coords.push_back(GetRegister(array_register)); - } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 - // or in the next register if lod or bias are used + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - coords.push_back(GetRegister(depth_register)); - } - // Fill ignored coordinates - while (coords.size() < total_coord_count) { - coords.push_back(Immediate(0)); + dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, - 0, std::move(coords)); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) ? static_cast<u64>(instr.gpr20.Value()) : coord_register + 1; + const u32 bias_offset = coord_count > 2 ? 1 : 0; std::vector<Node> coords; for (std::size_t i = 0; i < coord_count; ++i) { @@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); } - std::size_t array_offset{}; - if (is_array) { - array_offset = coords.size(); - coords.push_back(GetRegister(array_register)); - } + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 - // or in the next register if lod or bias are used + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - coords.push_back(GetRegister(depth_register)); - } - // Fill ignored coordinates - while (coords.size() < total_coord_count) { - coords.push_back(Immediate(0)); + dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, - (coord_count > 2 ? 1 : 0), std::move(coords)); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, @@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de const u64 coord_register = array_register + (is_array ? 1 : 0); std::vector<Node> coords; - - for (size_t i = 0; i < coord_count; ++i) { + for (size_t i = 0; i < coord_count; ++i) coords.push_back(GetRegister(coord_register + i)); - } - std::optional<u32> array_offset; - if (is_array) { - array_offset = static_cast<u32>(coords.size()); - coords.push_back(GetRegister(array_register)); - } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; - values[element] = - Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + auto coords_copy = coords; + MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } return values; @@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { const std::size_t type_coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; // If enabled arrays index is always stored in the gpr8 field @@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is : coord_register + 1; std::vector<Node> coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); } - std::optional<u32> array_offset; - if (is_array) { - array_offset = static_cast<u32>(coords.size()); - coords.push_back(GetRegister(array_register)); - } - const auto coords_count = static_cast<u32>(coords.size()); - if (lod_enabled) { - // When lod is used always is in grp20 - coords.push_back(GetRegister(instr.gpr20)); - } else { - coords.push_back(Immediate(0)); - } + const Node array = is_array ? GetRegister(array_register) : nullptr; + // When lod is used always is in gpr20 + const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, coords_count, array_offset}; - values[element] = - Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); + auto coords_copy = coords; + MetaTexture meta{sampler, array, {}, {lod}, element}; + values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1d4fbef53..52c7f2c4e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -156,12 +156,12 @@ enum class OperationCode { Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 + Texture, /// (MetaTexture, float[N] coords) -> float4 + TextureLod, /// (MetaTexture, float[N] coords) -> float4 + TextureGather, /// (MetaTexture, float[N] coords) -> float4 + TextureQueryDimensions, /// (MetaTexture, float a) -> float4 + TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 + TexelFetch, /// (MetaTexture, int[N], int) -> float4 Branch, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void @@ -288,9 +288,10 @@ struct MetaHalfArithmetic { struct MetaTexture { const Sampler& sampler; + Node array{}; + Node depth_compare{}; + std::vector<Node> extras; u32 element{}; - u32 coords_count{}; - std::optional<u32> array_index; }; constexpr MetaArithmetic PRECISE = {true}; @@ -754,9 +755,8 @@ private: bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, std::size_t array_offset, std::size_t bias_offset, - std::vector<Node>&& coords); + Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, + Node array, Node depth_compare, u32 bias_offset); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); |