diff options
Diffstat (limited to 'src')
31 files changed, 131 insertions, 357 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 965c28787..f61bcd40d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -140,8 +140,6 @@ add_library(core STATIC hle/kernel/svc_wrap.h hle/kernel/thread.cpp hle/kernel/thread.h - hle/kernel/timer.cpp - hle/kernel/timer.h hle/kernel/vm_manager.cpp hle/kernel/vm_manager.h hle/kernel/wait_object.cpp diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 67674cd47..7a524ce5a 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -18,7 +18,6 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/thread.h" -#include "core/hle/kernel/timer.h" #include "core/hle/lock.h" #include "core/hle/result.h" @@ -86,27 +85,12 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_ } } -/// The timer callback event, called when a timer is fired -static void TimerCallback(u64 timer_handle, int cycles_late) { - const auto proper_handle = static_cast<Handle>(timer_handle); - const auto& system = Core::System::GetInstance(); - SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle); - - if (timer == nullptr) { - LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle); - return; - } - - timer->Signal(cycles_late); -} - struct KernelCore::Impl { void Initialize(KernelCore& kernel) { Shutdown(); InitializeSystemResourceLimit(kernel); InitializeThreads(); - InitializeTimers(); } void Shutdown() { @@ -122,9 +106,6 @@ struct KernelCore::Impl { thread_wakeup_callback_handle_table.Clear(); thread_wakeup_event_type = nullptr; - timer_callback_handle_table.Clear(); - timer_callback_event_type = nullptr; - named_ports.clear(); } @@ -146,11 +127,6 @@ struct KernelCore::Impl { CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); } - void InitializeTimers() { - timer_callback_handle_table.Clear(); - timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback); - } - std::atomic<u32> next_object_id{0}; std::atomic<u64> next_process_id{Process::ProcessIDMin}; std::atomic<u64> next_thread_id{1}; @@ -161,12 +137,6 @@ struct KernelCore::Impl { SharedPtr<ResourceLimit> system_resource_limit; - /// The event type of the generic timer callback event - CoreTiming::EventType* timer_callback_event_type = nullptr; - // TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future, - // allowing us to simply use a pool index or similar. - Kernel::HandleTable timer_callback_handle_table; - CoreTiming::EventType* thread_wakeup_event_type = nullptr; // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, // allowing us to simply use a pool index or similar. @@ -198,10 +168,6 @@ SharedPtr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(Handle return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle); } -SharedPtr<Timer> KernelCore::RetrieveTimerFromCallbackHandleTable(Handle handle) const { - return impl->timer_callback_handle_table.Get<Timer>(handle); -} - void KernelCore::AppendNewProcess(SharedPtr<Process> process) { impl->process_list.push_back(std::move(process)); } @@ -247,18 +213,10 @@ u64 KernelCore::CreateNewProcessID() { return impl->next_process_id++; } -ResultVal<Handle> KernelCore::CreateTimerCallbackHandle(const SharedPtr<Timer>& timer) { - return impl->timer_callback_handle_table.Create(timer); -} - CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const { return impl->thread_wakeup_event_type; } -CoreTiming::EventType* KernelCore::TimerCallbackEventType() const { - return impl->timer_callback_event_type; -} - Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() { return impl->thread_wakeup_callback_handle_table; } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 58c9d108b..c643a6401 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -22,7 +22,6 @@ class HandleTable; class Process; class ResourceLimit; class Thread; -class Timer; /// Represents a single instance of the kernel. class KernelCore { @@ -51,9 +50,6 @@ public: /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table. SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const; - /// Retrieves a shared pointer to a Timer instance within the timer callback handle table. - SharedPtr<Timer> RetrieveTimerFromCallbackHandleTable(Handle handle) const; - /// Adds the given shared pointer to an internal list of active processes. void AppendNewProcess(SharedPtr<Process> process); @@ -82,7 +78,6 @@ private: friend class Object; friend class Process; friend class Thread; - friend class Timer; /// Creates a new object ID, incrementing the internal object ID counter. u32 CreateNewObjectID(); @@ -93,15 +88,9 @@ private: /// Creates a new thread ID, incrementing the internal thread ID counter. u64 CreateNewThreadID(); - /// Creates a timer callback handle for the given timer. - ResultVal<Handle> CreateTimerCallbackHandle(const SharedPtr<Timer>& timer); - /// Retrieves the event type used for thread wakeup callbacks. CoreTiming::EventType* ThreadWakeupCallbackEventType() const; - /// Retrieves the event type used for timer callbacks. - CoreTiming::EventType* TimerCallbackEventType() const; - /// Provides a reference to the thread wakeup callback handle table. Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp index 806078638..8870463d0 100644 --- a/src/core/hle/kernel/object.cpp +++ b/src/core/hle/kernel/object.cpp @@ -16,7 +16,6 @@ bool Object::IsWaitable() const { case HandleType::ReadableEvent: case HandleType::Thread: case HandleType::Process: - case HandleType::Timer: case HandleType::ServerPort: case HandleType::ServerSession: return true; diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h index 1541b6e3c..4c2505908 100644 --- a/src/core/hle/kernel/object.h +++ b/src/core/hle/kernel/object.h @@ -25,7 +25,6 @@ enum class HandleType : u32 { Thread, Process, AddressArbiter, - Timer, ResourceLimit, ClientPort, ServerPort, diff --git a/src/core/hle/kernel/timer.cpp b/src/core/hle/kernel/timer.cpp deleted file mode 100644 index 3afe60469..000000000 --- a/src/core/hle/kernel/timer.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/logging/log.h" -#include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" -#include "core/hle/kernel/handle_table.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/object.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/kernel/timer.h" - -namespace Kernel { - -Timer::Timer(KernelCore& kernel) : WaitObject{kernel} {} -Timer::~Timer() = default; - -SharedPtr<Timer> Timer::Create(KernelCore& kernel, ResetType reset_type, std::string name) { - SharedPtr<Timer> timer(new Timer(kernel)); - - timer->reset_type = reset_type; - timer->signaled = false; - timer->name = std::move(name); - timer->initial_delay = 0; - timer->interval_delay = 0; - timer->callback_handle = kernel.CreateTimerCallbackHandle(timer).Unwrap(); - - return timer; -} - -bool Timer::ShouldWait(Thread* thread) const { - return !signaled; -} - -void Timer::Acquire(Thread* thread) { - ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); - - if (reset_type == ResetType::OneShot) - signaled = false; -} - -void Timer::Set(s64 initial, s64 interval) { - // Ensure we get rid of any previous scheduled event - Cancel(); - - initial_delay = initial; - interval_delay = interval; - - if (initial == 0) { - // Immediately invoke the callback - Signal(0); - } else { - CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), kernel.TimerCallbackEventType(), - callback_handle); - } -} - -void Timer::Cancel() { - CoreTiming::UnscheduleEvent(kernel.TimerCallbackEventType(), callback_handle); -} - -void Timer::Clear() { - signaled = false; -} - -void Timer::Signal(int cycles_late) { - LOG_TRACE(Kernel, "Timer {} fired", GetObjectId()); - - signaled = true; - - // Resume all waiting threads - WakeupAllWaitingThreads(); - - if (interval_delay != 0) { - // Reschedule the timer with the interval delay - CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(interval_delay) - cycles_late, - kernel.TimerCallbackEventType(), callback_handle); - } -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/timer.h b/src/core/hle/kernel/timer.h deleted file mode 100644 index ce3e74426..000000000 --- a/src/core/hle/kernel/timer.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" -#include "core/hle/kernel/object.h" -#include "core/hle/kernel/wait_object.h" - -namespace Kernel { - -class KernelCore; - -class Timer final : public WaitObject { -public: - /** - * Creates a timer - * @param kernel The kernel instance to create the timer callback handle for. - * @param reset_type ResetType describing how to create the timer - * @param name Optional name of timer - * @return The created Timer - */ - static SharedPtr<Timer> Create(KernelCore& kernel, ResetType reset_type, - std::string name = "Unknown"); - - std::string GetTypeName() const override { - return "Timer"; - } - std::string GetName() const override { - return name; - } - - static const HandleType HANDLE_TYPE = HandleType::Timer; - HandleType GetHandleType() const override { - return HANDLE_TYPE; - } - - ResetType GetResetType() const { - return reset_type; - } - - u64 GetInitialDelay() const { - return initial_delay; - } - - u64 GetIntervalDelay() const { - return interval_delay; - } - - bool ShouldWait(Thread* thread) const override; - void Acquire(Thread* thread) override; - - /** - * Starts the timer, with the specified initial delay and interval. - * @param initial Delay until the timer is first fired - * @param interval Delay until the timer is fired after the first time - */ - void Set(s64 initial, s64 interval); - - void Cancel(); - void Clear(); - - /** - * Signals the timer, waking up any waiting threads and rescheduling it - * for the next interval. - * This method should not be called from outside the timer callback handler, - * lest multiple callback events get scheduled. - */ - void Signal(int cycles_late); - -private: - explicit Timer(KernelCore& kernel); - ~Timer() override; - - ResetType reset_type; ///< The ResetType of this timer - - u64 initial_delay; ///< The delay until the timer fires for the first time - u64 interval_delay; ///< The delay until the timer fires after the first time - - bool signaled; ///< Whether the timer has been signaled or not - std::string name; ///< Name of timer (optional) - - /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. - Handle callback_handle; -}; - -} // namespace Kernel diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index a850cadc8..11eba4a12 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -5,7 +5,6 @@ #include <chrono> #include <cstring> #include <memory> -#include <optional> #include <vector> #include <opus.h> @@ -30,48 +29,66 @@ public: u32 channel_count) : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)), sample_rate(sample_rate), channel_count(channel_count) { + // clang-format off static const FunctionInfo functions[] = { - {0, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, + {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"}, {1, nullptr, "SetContext"}, - {2, nullptr, "DecodeInterleavedForMultiStream"}, + {2, nullptr, "DecodeInterleavedForMultiStreamOld"}, {3, nullptr, "SetContextForMultiStream"}, - {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance, - "DecodeInterleavedWithPerformance"}, - {5, nullptr, "Unknown5"}, - {6, nullptr, "Unknown6"}, - {7, nullptr, "Unknown7"}, + {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, + {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, + {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, + {7, nullptr, "DecodeInterleavedForMultiStream"}, }; + // clang-format on + RegisterHandlers(functions); } private: - void DecodeInterleaved(Kernel::HLERequestContext& ctx) { + /// Describes extra behavior that may be asked of the decoding context. + enum class ExtraBehavior { + /// No extra behavior. + None, + + /// Resets the decoder context back to a freshly initialized state. + ResetContext, + }; + + void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Audio, "called"); - u32 consumed = 0; - u32 sample_count = 0; - std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); - if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples)) { - LOG_ERROR(Audio, "Failed to decode opus data"); - IPC::ResponseBuilder rb{ctx, 2}; - // TODO(ogniK): Use correct error code - rb.Push(ResultCode(-1)); - return; - } - IPC::ResponseBuilder rb{ctx, 4}; - rb.Push(RESULT_SUCCESS); - rb.Push<u32>(consumed); - rb.Push<u32>(sample_count); - ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); + DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); } - void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) { + void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Audio, "called"); + + u64 performance = 0; + DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None); + } + + void DecodeInterleaved(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Audio, "called"); + IPC::RequestParser rp{ctx}; + const auto extra_behavior = + rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None; + + u64 performance = 0; + DecodeInterleavedHelper(ctx, &performance, extra_behavior); + } + + void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, + ExtraBehavior extra_behavior) { u32 consumed = 0; u32 sample_count = 0; - u64 performance = 0; std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); + + if (extra_behavior == ExtraBehavior::ResetContext) { + ResetDecoderContext(); + } + if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { LOG_ERROR(Audio, "Failed to decode opus data"); @@ -80,25 +97,28 @@ private: rb.Push(ResultCode(-1)); return; } - IPC::ResponseBuilder rb{ctx, 6}; + + const u32 param_size = performance != nullptr ? 6 : 4; + IPC::ResponseBuilder rb{ctx, param_size}; rb.Push(RESULT_SUCCESS); rb.Push<u32>(consumed); rb.Push<u32>(sample_count); - rb.Push<u64>(performance); + if (performance) { + rb.Push<u64>(*performance); + } ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); } - bool Decoder_DecodeInterleaved( - u32& consumed, u32& sample_count, const std::vector<u8>& input, - std::vector<opus_int16>& output, - std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) { + bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, + std::vector<opus_int16>& output, u64* out_performance_time) { const auto start_time = std::chrono::high_resolution_clock::now(); - std::size_t raw_output_sz = output.size() * sizeof(opus_int16); + const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); if (sizeof(OpusHeader) > input.size()) { LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", sizeof(OpusHeader), input.size()); return false; } + OpusHeader hdr{}; std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { @@ -106,8 +126,9 @@ private: sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); return false; } - auto frame = input.data() + sizeof(OpusHeader); - auto decoded_sample_count = opus_packet_get_nb_samples( + + const auto frame = input.data() + sizeof(OpusHeader); + const auto decoded_sample_count = opus_packet_get_nb_samples( frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), static_cast<opus_int32>(sample_rate)); if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { @@ -117,8 +138,9 @@ private: decoded_sample_count * channel_count * sizeof(u16), raw_output_sz); return false; } + const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); - auto out_sample_count = + const auto out_sample_count = opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); if (out_sample_count < 0) { LOG_ERROR(Audio, @@ -127,16 +149,24 @@ private: out_sample_count, frame_size, static_cast<u32>(hdr.sz)); return false; } + const auto end_time = std::chrono::high_resolution_clock::now() - start_time; sample_count = out_sample_count; consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); - if (performance_time.has_value()) { - performance_time->get() = + if (out_performance_time != nullptr) { + *out_performance_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); } + return true; } + void ResetDecoderContext() { + ASSERT(decoder != nullptr); + + opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); + } + struct OpusHeader { u32_be sz; // Needs to be BE for some odd reason INSERT_PADDING_WORDS(1); @@ -157,6 +187,7 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto sample_rate = rp.Pop<u32>(); const auto channel_count = rp.Pop<u32>(); + LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count); ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 || @@ -174,9 +205,10 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - auto sample_rate = rp.Pop<u32>(); - auto channel_count = rp.Pop<u32>(); - auto buffer_sz = rp.Pop<u32>(); + const auto sample_rate = rp.Pop<u32>(); + const auto channel_count = rp.Pop<u32>(); + const auto buffer_sz = rp.Pop<u32>(); + LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate, channel_count, buffer_sz); @@ -185,8 +217,9 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { "Invalid sample rate"); ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); - std::size_t worker_sz = WorkerBufferSize(channel_count); + const std::size_t worker_sz = WorkerBufferSize(channel_count); ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); + std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ static_cast<OpusDecoder*>(operator new(worker_sz))}; if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5e778c6bc..8e915e2ae 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1252,11 +1252,19 @@ union Instruction { union { BitField<20, 14, u64> offset; BitField<34, 5, u64> index; + + u64 GetOffset() const { + return offset * 4; + } } cbuf34; union { BitField<20, 16, s64> offset; BitField<36, 5, u64> index; + + s64 GetOffset() const { + return offset; + } } cbuf36; // Unsure about the size of this one. diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index ff5310848..4c08bb148 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -49,11 +49,6 @@ public: return false; } - /// Attempt to use a faster method to fill a region - virtual bool AccelerateFill(const void* config) { - return false; - } - /// Attempt to use a faster method to display the framebuffer to screen virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ee313cb2f..9f7c837d6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -477,9 +477,9 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { cached_pages.add({pages_interval, delta}); } -void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb, - bool using_depth_fb, bool preserve_contents, - std::optional<std::size_t> single_color_target) { +std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( + OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, + std::optional<std::size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; @@ -491,7 +491,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the // host framebuffer may contain different attachments - return; + return current_depth_stencil_usage; } current_framebuffer_config_state = fb_config_state; @@ -561,12 +561,14 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us depth_surface->MarkAsModified(true, res_cache); fbkey.zeta = depth_surface->Texture().handle; - fbkey.stencil_enable = regs.stencil_enable; + fbkey.stencil_enable = regs.stencil_enable && + depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } SetupCachedFramebuffer(fbkey, current_state); - SyncViewport(current_state); + + return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; } void RasterizerOpenGL::Clear() { @@ -634,8 +636,8 @@ void RasterizerOpenGL::Clear() { return; } - ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false, - regs.clear_buffers.RT.Value()); + const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( + clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); if (regs.clear_flags.scissor) { SyncScissorTest(clear_state); } @@ -650,11 +652,11 @@ void RasterizerOpenGL::Clear() { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } - if (use_depth && use_stencil) { + if (clear_depth && clear_stencil) { glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); - } else if (use_depth) { + } else if (clear_depth) { glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); - } else if (use_stencil) { + } else if (clear_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } } @@ -781,11 +783,6 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs return true; } -bool RasterizerOpenGL::AccelerateFill(const void* config) { - UNREACHABLE(); - return true; -} - bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { if (!framebuffer_addr) { @@ -957,7 +954,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader } } else { // Buffer is accessed directly, upload just what we use - size = used_buffer.GetSize() * sizeof(float); + size = used_buffer.GetSize(); } // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a103692f9..7f2bf0f8b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -56,7 +56,6 @@ public: void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; - bool AccelerateFill(const void* config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; @@ -122,10 +121,12 @@ private: * @param using_depth_fb If true, configure the depth/stencil framebuffer. * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. * @param single_color_target Specifies if a single color buffer target should be used. + * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture + * (requires using_depth_fb to be true) */ - void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true, - bool using_depth_fb = true, bool preserve_contents = true, - std::optional<std::size_t> single_color_target = {}); + std::pair<bool, bool> ConfigureFramebuffers( + OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, + bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); /// Configures the current constbuffers to use for the draw command. void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, @@ -214,6 +215,7 @@ private: std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; FramebufferConfigState current_framebuffer_config_state; + std::pair<bool, bool> current_depth_stencil_usage{}; std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2b9c4628f..50286432d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -734,7 +734,6 @@ void CachedSurface::FlushGLBuffer() { glPixelStorei(GL_PACK_ROW_LENGTH, 0); ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, params.height); - ASSERT(params.type != SurfaceType::Fill); const u8* const texture_src_data = Memory::GetPointer(params.addr); ASSERT(texture_src_data); if (params.is_tiled) { @@ -904,9 +903,6 @@ void CachedSurface::EnsureTextureView() { MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { - if (params.type == SurfaceType::Fill) - return; - MICROPROFILE_SCOPE(OpenGL_TextureUL); for (u32 i = 0; i < params.max_mip_level; i++) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 004245431..36035d0d2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -543,8 +543,9 @@ private: if (const auto immediate = std::get_if<ImmediateNode>(offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); - return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / 4, - offset_imm % 4); + ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); + return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), + offset_imm / (4 * 4), (offset_imm / 4) % 4); } else if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index e7847f614..51b8d55d4 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index a237dcb92..37eef2bf2 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -35,7 +35,7 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 p switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HMUL2_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); case OpCode::Id::HADD2_R: case OpCode::Id::HMUL2_R: return GetRegister(instr.gpr20); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 4a8cc1a1c..cc9a76a19 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -26,7 +26,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3 } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index ee18d3a99..728a393a1 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -48,7 +48,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); const bool input_signed = instr.conversion.is_input_signed; @@ -72,7 +72,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); @@ -110,7 +110,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index be8dc2230..52f39d3ff 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -27,14 +27,14 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; } case OpCode::Id::FFMA_RR: return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::FFMA_RC: { return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; } case OpCode::Id::FFMA_IMM: return {GetImmediate19(instr), GetRegister(instr.gpr39)}; diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index ba846f1bd..9f9da2278 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index e88b04d18..dd3aef6f2 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u3 } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 4a6b945f9..43a0a9e10 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -39,13 +39,14 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { neg_b = instr.hfma2.negate_b; neg_c = instr.hfma2.negate_c; return {instr.hfma2.saturate, instr.hfma2.type_b, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, - GetRegister(instr.gpr39)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), + instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; case OpCode::Id::HFMA2_RC: neg_b = instr.hfma2.negate_b; neg_c = instr.hfma2.negate_c; return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + instr.hfma2.type_b, + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::HFMA2_RR: neg_b = instr.hfma2.rr.negate_b; neg_c = instr.hfma2.rr.negate_c; diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 85e67b03b..16eb3985f 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index c8b105a08..daf97174b 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7d4d2b9ef..1f418b4e6 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -80,7 +80,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { Node index = GetRegister(instr.gpr8); const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index); + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); switch (instr.ld_c.type.Value()) { case Tegra::Shader::UniformType::Single: @@ -89,7 +89,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { case Tegra::Shader::UniformType::Double: { const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); SetTemporal(bb, 0, op_a); SetTemporal(bb, 1, op_b); @@ -142,7 +142,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { ASSERT(cbuf != nullptr); const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4; + const auto cbuf_offset = cbuf_offset_imm->GetValue(); bb.push_back(Comment( fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 85026bb37..6623f8ff9 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 0cd9cd1cc..9cb864500 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -32,13 +32,14 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {instr.xmad.merge_56, + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RR: return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RC: return {false, GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::XMAD_IMM: return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), GetRegister(instr.gpr39)}; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index c4ecb2e3c..6e42e3dfb 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -249,7 +249,7 @@ public: } u32 GetSize() const { - return max_offset + 1; + return max_offset + sizeof(float); } private: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index edd3816ba..b783e4b27 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -109,8 +109,7 @@ enum class SurfaceType { ColorTexture = 0, Depth = 1, DepthStencil = 2, - Fill = 3, - Invalid = 4, + Invalid = 3, }; enum class SurfaceTarget { diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 0c0864742..f50225d5f 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -13,7 +13,6 @@ #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" -#include "core/hle/kernel/timer.h" #include "core/hle/kernel/wait_object.h" #include "core/memory.h" @@ -155,8 +154,6 @@ std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitO switch (object.GetHandleType()) { case Kernel::HandleType::ReadableEvent: return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object)); - case Kernel::HandleType::Timer: - return std::make_unique<WaitTreeTimer>(static_cast<const Kernel::Timer&>(object)); case Kernel::HandleType::Thread: return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object)); default: @@ -348,23 +345,6 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeEvent::GetChildren() const { return list; } -WaitTreeTimer::WaitTreeTimer(const Kernel::Timer& object) : WaitTreeWaitObject(object) {} -WaitTreeTimer::~WaitTreeTimer() = default; - -std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeTimer::GetChildren() const { - std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren()); - - const auto& timer = static_cast<const Kernel::Timer&>(object); - - list.push_back(std::make_unique<WaitTreeText>( - tr("reset type = %1").arg(GetResetTypeQString(timer.GetResetType())))); - list.push_back( - std::make_unique<WaitTreeText>(tr("initial delay = %1").arg(timer.GetInitialDelay()))); - list.push_back( - std::make_unique<WaitTreeText>(tr("interval delay = %1").arg(timer.GetIntervalDelay()))); - return list; -} - WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::SharedPtr<Kernel::Thread>>& list) : thread_list(list) {} WaitTreeThreadList::~WaitTreeThreadList() = default; diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h index e639ef412..365c3dbfe 100644 --- a/src/yuzu/debugger/wait_tree.h +++ b/src/yuzu/debugger/wait_tree.h @@ -20,7 +20,6 @@ namespace Kernel { class ReadableEvent; class WaitObject; class Thread; -class Timer; } // namespace Kernel class WaitTreeThread; @@ -150,15 +149,6 @@ public: std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; }; -class WaitTreeTimer : public WaitTreeWaitObject { - Q_OBJECT -public: - explicit WaitTreeTimer(const Kernel::Timer& object); - ~WaitTreeTimer() override; - - std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; -}; - class WaitTreeThreadList : public WaitTreeExpandableItem { Q_OBJECT public: |