From 139ea93512aeead8a4aee3910a3de86eb109a838 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 5 Nov 2021 15:52:31 +0100 Subject: VideoCore: implement channels on gpu caches. --- src/video_core/gpu.cpp | 468 +++++++++---------------------------------------- 1 file changed, 81 insertions(+), 387 deletions(-) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 33431f2a0..80a1c69e0 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -18,6 +18,8 @@ #include "core/hle/service/nvdrv/nvdata.h" #include "core/perf_stats.h" #include "video_core/cdma_pusher.h" +#include "video_core/control/channel_state.h" +#include "video_core/control/scheduler.h" #include "video_core/dma_pusher.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); struct GPU::Impl { explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) - : gpu{gpu_}, system{system_}, memory_manager{std::make_unique( - system)}, - dma_pusher{std::make_unique(system, gpu)}, use_nvdec{use_nvdec_}, - maxwell_3d{std::make_unique(system, *memory_manager)}, - fermi_2d{std::make_unique()}, - kepler_compute{std::make_unique(system, *memory_manager)}, - maxwell_dma{std::make_unique(system, *memory_manager)}, - kepler_memory{std::make_unique(system, *memory_manager)}, + : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, shader_notify{std::make_unique()}, is_async{is_async_}, - gpu_thread{system_, is_async_} {} + gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {} ~Impl() = default; - /// Binds a renderer to the GPU. - void BindRenderer(std::unique_ptr renderer_) { - renderer = std::move(renderer_); - rasterizer = renderer->ReadRasterizer(); - - memory_manager->BindRasterizer(rasterizer); - maxwell_3d->BindRasterizer(rasterizer); - fermi_2d->BindRasterizer(rasterizer); - kepler_compute->BindRasterizer(rasterizer); - kepler_memory->BindRasterizer(rasterizer); - maxwell_dma->BindRasterizer(rasterizer); + std::shared_ptr CreateChannel(s32 channel_id) { + auto channel_state = std::make_shared(channel_id); + channels.emplace(channel_id, channel_state); + scheduler->DeclareChannel(channel_state); + return channel_state; } - /// Calls a GPU method. - void CallMethod(const GPU::MethodCall& method_call) { - LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, - method_call.subchannel); + void BindChannel(s32 channel_id) { + if (bound_channel == channel_id) { + return; + } + auto it = channels.find(channel_id); + ASSERT(it != channels.end()); + bound_channel = channel_id; + current_channel = it->second.get(); - ASSERT(method_call.subchannel < bound_engines.size()); + rasterizer->BindChannel(*current_channel); + } - if (ExecuteMethodOnEngine(method_call.method)) { - CallEngineMethod(method_call); - } else { - CallPullerMethod(method_call); - } + std::shared_ptr AllocateChannel() { + return CreateChannel(new_channel_id++); } - /// Calls a GPU multivalue method. - void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, - u32 methods_pending) { - LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); + void InitChannel(Control::ChannelState& to_init) { + to_init.Init(system, gpu); + to_init.BindRasterizer(rasterizer); + rasterizer->InitializeChannel(to_init); + } - ASSERT(subchannel < bound_engines.size()); + void ReleaseChannel(Control::ChannelState& to_release) { + UNIMPLEMENTED(); + } - if (ExecuteMethodOnEngine(method)) { - CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); - } else { - for (std::size_t i = 0; i < amount; i++) { - CallPullerMethod(GPU::MethodCall{ - method, - base_start[i], - subchannel, - methods_pending - static_cast(i), - }); - } + void CreateHost1xChannel() { + if (host1x_channel) { + return; } + host1x_channel = CreateChannel(0); + host1x_channel->memory_manager = std::make_shared(system); + InitChannel(*host1x_channel); + } + + /// Binds a renderer to the GPU. + void BindRenderer(std::unique_ptr renderer_) { + renderer = std::move(renderer_); + rasterizer = renderer->ReadRasterizer(); } /// Flush all current written commands into the host GPU for execution. @@ -146,42 +141,44 @@ struct GPU::Impl { /// Returns a reference to the Maxwell3D GPU engine. [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { - return *maxwell_3d; + ASSERT(current_channel); + return *current_channel->maxwell_3d; } /// Returns a const reference to the Maxwell3D GPU engine. [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { - return *maxwell_3d; + ASSERT(current_channel); + return *current_channel->maxwell_3d; } /// Returns a reference to the KeplerCompute GPU engine. [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { - return *kepler_compute; + ASSERT(current_channel); + return *current_channel->kepler_compute; } /// Returns a reference to the KeplerCompute GPU engine. [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { - return *kepler_compute; + ASSERT(current_channel); + return *current_channel->kepler_compute; } /// Returns a reference to the GPU memory manager. [[nodiscard]] Tegra::MemoryManager& MemoryManager() { - return *memory_manager; - } - - /// Returns a const reference to the GPU memory manager. - [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const { - return *memory_manager; + CreateHost1xChannel(); + return *host1x_channel->memory_manager; } /// Returns a reference to the GPU DMA pusher. [[nodiscard]] Tegra::DmaPusher& DmaPusher() { - return *dma_pusher; + ASSERT(current_channel); + return *current_channel->dma_pusher; } /// Returns a const reference to the GPU DMA pusher. [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { - return *dma_pusher; + ASSERT(current_channel); + return *current_channel->dma_pusher; } /// Returns a reference to the underlying renderer. @@ -306,7 +303,7 @@ struct GPU::Impl { /// This can be used to launch any necessary threads and register any necessary /// core timing events. void Start() { - gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); + gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); cpu_context = renderer->GetRenderWindow().CreateSharedContext(); cpu_context->MakeCurrent(); } @@ -328,8 +325,8 @@ struct GPU::Impl { } /// Push GPU command entries to be processed - void PushGPUEntries(Tegra::CommandList&& entries) { - gpu_thread.SubmitList(std::move(entries)); + void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { + gpu_thread.SubmitList(channel, std::move(entries)); } /// Push GPU command buffer entries to be processed @@ -381,303 +378,16 @@ struct GPU::Impl { interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } - void ProcessBindMethod(const GPU::MethodCall& method_call) { - // Bind the current subchannel to the desired engine id. - LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, - method_call.argument); - const auto engine_id = static_cast(method_call.argument); - bound_engines[method_call.subchannel] = static_cast(engine_id); - switch (engine_id) { - case EngineID::FERMI_TWOD_A: - dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); - break; - case EngineID::MAXWELL_B: - dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); - break; - case EngineID::KEPLER_COMPUTE_B: - dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); - break; - case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); - break; - case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); - } - } - - void ProcessFenceActionMethod() { - switch (regs.fence_action.op) { - case GPU::FenceOperation::Acquire: - WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); - break; - case GPU::FenceOperation::Increment: - IncrementSyncPoint(regs.fence_action.syncpoint_id); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); - } - } - - void ProcessWaitForInterruptMethod() { - // TODO(bunnei) ImplementMe - LOG_WARNING(HW_GPU, "(STUBBED) called"); - } - - void ProcessSemaphoreTriggerMethod() { - const auto semaphoreOperationMask = 0xF; - const auto op = - static_cast(regs.semaphore_trigger & semaphoreOperationMask); - if (op == GpuSemaphoreOperation::WriteLong) { - struct Block { - u32 sequence; - u32 zeros = 0; - u64 timestamp; - }; - - Block block{}; - block.sequence = regs.semaphore_sequence; - // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of - // CoreTiming - block.timestamp = GetTicks(); - memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, - sizeof(block)); - } else { - const u32 word{memory_manager->Read(regs.semaphore_address.SemaphoreAddress())}; - if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || - (op == GpuSemaphoreOperation::AcquireGequal && - static_cast(word - regs.semaphore_sequence) > 0) || - (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { - // Nothing to do in this case - } else { - regs.acquire_source = true; - regs.acquire_value = regs.semaphore_sequence; - if (op == GpuSemaphoreOperation::AcquireEqual) { - regs.acquire_active = true; - regs.acquire_mode = false; - } else if (op == GpuSemaphoreOperation::AcquireGequal) { - regs.acquire_active = true; - regs.acquire_mode = true; - } else if (op == GpuSemaphoreOperation::AcquireMask) { - // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with - // semaphore_sequence, gives a non-0 result - LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); - } else { - LOG_ERROR(HW_GPU, "Invalid semaphore operation"); - } - } - } - } - - void ProcessSemaphoreRelease() { - memory_manager->Write(regs.semaphore_address.SemaphoreAddress(), - regs.semaphore_release); - } - - void ProcessSemaphoreAcquire() { - const u32 word = memory_manager->Read(regs.semaphore_address.SemaphoreAddress()); - const auto value = regs.semaphore_acquire; - if (word != value) { - regs.acquire_active = true; - regs.acquire_value = value; - // TODO(kemathe73) figure out how to do the acquire_timeout - regs.acquire_mode = false; - regs.acquire_source = false; - } - } - - /// Calls a GPU puller method. - void CallPullerMethod(const GPU::MethodCall& method_call) { - regs.reg_array[method_call.method] = method_call.argument; - const auto method = static_cast(method_call.method); - - switch (method) { - case BufferMethods::BindObject: { - ProcessBindMethod(method_call); - break; - } - case BufferMethods::Nop: - case BufferMethods::SemaphoreAddressHigh: - case BufferMethods::SemaphoreAddressLow: - case BufferMethods::SemaphoreSequence: - break; - case BufferMethods::UnkCacheFlush: - rasterizer->SyncGuestHost(); - break; - case BufferMethods::WrcacheFlush: - rasterizer->SignalReference(); - break; - case BufferMethods::FenceValue: - break; - case BufferMethods::RefCnt: - rasterizer->SignalReference(); - break; - case BufferMethods::FenceAction: - ProcessFenceActionMethod(); - break; - case BufferMethods::WaitForInterrupt: - rasterizer->WaitForIdle(); - break; - case BufferMethods::SemaphoreTrigger: { - ProcessSemaphoreTriggerMethod(); - break; - } - case BufferMethods::NotifyIntr: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); - break; - } - case BufferMethods::Unk28: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); - break; - } - case BufferMethods::SemaphoreAcquire: { - ProcessSemaphoreAcquire(); - break; - } - case BufferMethods::SemaphoreRelease: { - ProcessSemaphoreRelease(); - break; - } - case BufferMethods::Yield: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); - break; - } - default: - LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); - break; - } - } - - /// Calls a GPU engine method. - void CallEngineMethod(const GPU::MethodCall& method_call) { - const EngineID engine = bound_engines[method_call.subchannel]; - - switch (engine) { - case EngineID::FERMI_TWOD_A: - fermi_2d->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); - break; - case EngineID::MAXWELL_B: - maxwell_3d->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); - break; - case EngineID::KEPLER_COMPUTE_B: - kepler_compute->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); - break; - case EngineID::MAXWELL_DMA_COPY_A: - maxwell_dma->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); - break; - case EngineID::KEPLER_INLINE_TO_MEMORY_B: - kepler_memory->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented engine"); - } - } - - /// Calls a GPU engine multivalue method. - void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, - u32 methods_pending) { - const EngineID engine = bound_engines[subchannel]; - - switch (engine) { - case EngineID::FERMI_TWOD_A: - fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); - break; - case EngineID::MAXWELL_B: - maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); - break; - case EngineID::KEPLER_COMPUTE_B: - kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); - break; - case EngineID::MAXWELL_DMA_COPY_A: - maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); - break; - case EngineID::KEPLER_INLINE_TO_MEMORY_B: - kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented engine"); - } - } - - /// Determines where the method should be executed. - [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) { - const auto buffer_method = static_cast(method); - return buffer_method >= BufferMethods::NonPullerMethods; - } - - struct Regs { - static constexpr size_t NUM_REGS = 0x40; - - union { - struct { - INSERT_PADDING_WORDS_NOINIT(0x4); - struct { - u32 address_high; - u32 address_low; - - [[nodiscard]] GPUVAddr SemaphoreAddress() const { - return static_cast((static_cast(address_high) << 32) | - address_low); - } - } semaphore_address; - - u32 semaphore_sequence; - u32 semaphore_trigger; - INSERT_PADDING_WORDS_NOINIT(0xC); - - // The pusher and the puller share the reference counter, the pusher only has read - // access - u32 reference_count; - INSERT_PADDING_WORDS_NOINIT(0x5); - - u32 semaphore_acquire; - u32 semaphore_release; - u32 fence_value; - GPU::FenceAction fence_action; - INSERT_PADDING_WORDS_NOINIT(0xE2); - - // Puller state - u32 acquire_mode; - u32 acquire_source; - u32 acquire_active; - u32 acquire_timeout; - u32 acquire_value; - }; - std::array reg_array; - }; - } regs{}; - GPU& gpu; Core::System& system; - std::unique_ptr memory_manager; - std::unique_ptr dma_pusher; + std::map> cdma_pushers; std::unique_ptr renderer; VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; - /// Mapping of command subchannels to their bound engine ids - std::array bound_engines{}; - /// 3D engine - std::unique_ptr maxwell_3d; - /// 2D engine - std::unique_ptr fermi_2d; - /// Compute engine - std::unique_ptr kepler_compute; - /// DMA engine - std::unique_ptr maxwell_dma; - /// Inline memory engine - std::unique_ptr kepler_memory; + std::shared_ptr host1x_channel; + s32 new_channel_id{1}; /// Shader build notifier std::unique_ptr shader_notify; /// When true, we are about to shut down emulation session, so terminate outstanding tasks @@ -710,33 +420,10 @@ struct GPU::Impl { VideoCommon::GPUThread::ThreadManager gpu_thread; std::unique_ptr cpu_context; -#define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, \ - "Field " #field_name " has invalid position") - - ASSERT_REG_POSITION(semaphore_address, 0x4); - ASSERT_REG_POSITION(semaphore_sequence, 0x6); - ASSERT_REG_POSITION(semaphore_trigger, 0x7); - ASSERT_REG_POSITION(reference_count, 0x14); - ASSERT_REG_POSITION(semaphore_acquire, 0x1A); - ASSERT_REG_POSITION(semaphore_release, 0x1B); - ASSERT_REG_POSITION(fence_value, 0x1C); - ASSERT_REG_POSITION(fence_action, 0x1D); - - ASSERT_REG_POSITION(acquire_mode, 0x100); - ASSERT_REG_POSITION(acquire_source, 0x101); - ASSERT_REG_POSITION(acquire_active, 0x102); - ASSERT_REG_POSITION(acquire_timeout, 0x103); - ASSERT_REG_POSITION(acquire_value, 0x104); - -#undef ASSERT_REG_POSITION - - enum class GpuSemaphoreOperation { - AcquireEqual = 0x1, - WriteLong = 0x2, - AcquireGequal = 0x4, - AcquireMask = 0x8, - }; + std::unique_ptr scheduler; + std::unordered_map> channels; + Tegra::Control::ChannelState* current_channel; + s32 bound_channel{-1}; }; GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) @@ -744,17 +431,24 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) GPU::~GPU() = default; -void GPU::BindRenderer(std::unique_ptr renderer) { - impl->BindRenderer(std::move(renderer)); +std::shared_ptr GPU::AllocateChannel() { + return impl->AllocateChannel(); +} + +void GPU::InitChannel(Control::ChannelState& to_init) { + impl->InitChannel(to_init); } -void GPU::CallMethod(const MethodCall& method_call) { - impl->CallMethod(method_call); +void GPU::BindChannel(s32 channel_id) { + impl->BindChannel(channel_id); } -void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, - u32 methods_pending) { - impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending); +void GPU::ReleaseChannel(Control::ChannelState& to_release) { + impl->ReleaseChannel(to_release); +} + +void GPU::BindRenderer(std::unique_ptr renderer) { + impl->BindRenderer(std::move(renderer)); } void GPU::FlushCommands() { @@ -881,8 +575,8 @@ void GPU::ReleaseContext() { impl->ReleaseContext(); } -void GPU::PushGPUEntries(Tegra::CommandList&& entries) { - impl->PushGPUEntries(std::move(entries)); +void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { + impl->PushGPUEntries(channel, std::move(entries)); } void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { -- cgit v1.2.3 From 2c62563ab58a70236a39571149f8370f3fdfb2a3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 7 Nov 2021 14:17:32 +0100 Subject: NVHOST_CTRl: Implement missing method and fix some stuffs. --- src/video_core/gpu.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 80a1c69e0..8c0ff0094 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -249,6 +249,11 @@ struct GPU::Impl { void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { std::scoped_lock lock{sync_mutex}; + u32 current_value = syncpoints.at(syncpoint_id).load(); + if ((static_cast(current_value) - static_cast(value)) >= 0) { + TriggerCpuInterrupt(syncpoint_id, value); + return; + } auto& interrupt = syncpt_interrupts.at(syncpoint_id); bool contains = std::any_of(interrupt.begin(), interrupt.end(), [value](u32 in_value) { return in_value == value; }); -- cgit v1.2.3 From f350c3d74ea7880fc6d21f7f638b0d4a70a3246b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jan 2022 22:03:37 +0100 Subject: Texture cache: Fix the remaining issues with memory mnagement and unmapping. --- src/video_core/gpu.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8c0ff0094..eebd7f3ff 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -73,6 +73,10 @@ struct GPU::Impl { rasterizer->InitializeChannel(to_init); } + void InitAddressSpace(Tegra::MemoryManager& memory_manager) { + memory_manager.BindRasterizer(rasterizer); + } + void ReleaseChannel(Control::ChannelState& to_release) { UNIMPLEMENTED(); } @@ -452,6 +456,10 @@ void GPU::ReleaseChannel(Control::ChannelState& to_release) { impl->ReleaseChannel(to_release); } +void GPU::InitAddressSpace(Tegra::MemoryManager& memory_manager) { + impl->InitAddressSpace(memory_manager); +} + void GPU::BindRenderer(std::unique_ptr renderer) { impl->BindRenderer(std::move(renderer)); } -- cgit v1.2.3 From 668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 30 Jan 2022 10:31:13 +0100 Subject: VideoCore: Refactor syncing. --- src/video_core/gpu.cpp | 197 +++++++++++++++++++++++++------------------------ 1 file changed, 102 insertions(+), 95 deletions(-) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index eebd7f3ff..1097db08a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -28,6 +28,8 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" +#include "video_core/host1x/host1x.h" +#include "video_core/host1x/syncpoint_manager.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" #include "video_core/shader_notify.h" @@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); struct GPU::Impl { explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) - : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, + : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, shader_notify{std::make_unique()}, is_async{is_async_}, gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {} @@ -115,31 +117,35 @@ struct GPU::Impl { } /// Request a host GPU memory flush from the CPU. - [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { - std::unique_lock lck{flush_request_mutex}; - const u64 fence = ++last_flush_fence; - flush_requests.emplace_back(fence, addr, size); + template + [[nodiscard]] u64 RequestSyncOperation(Func&& action) { + std::unique_lock lck{sync_request_mutex}; + const u64 fence = ++last_sync_fence; + sync_requests.emplace_back(action); return fence; } /// Obtains current flush request fence id. - [[nodiscard]] u64 CurrentFlushRequestFence() const { - return current_flush_fence.load(std::memory_order_relaxed); + [[nodiscard]] u64 CurrentSyncRequestFence() const { + return current_sync_fence.load(std::memory_order_relaxed); + } + + void WaitForSyncOperation(const u64 fence) { + std::unique_lock lck{sync_request_mutex}; + sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); } /// Tick pending requests within the GPU. void TickWork() { - std::unique_lock lck{flush_request_mutex}; - while (!flush_requests.empty()) { - auto& request = flush_requests.front(); - const u64 fence = request.fence; - const VAddr addr = request.addr; - const std::size_t size = request.size; - flush_requests.pop_front(); - flush_request_mutex.unlock(); - rasterizer->FlushRegion(addr, size); - current_flush_fence.store(fence); - flush_request_mutex.lock(); + std::unique_lock lck{sync_request_mutex}; + while (!sync_requests.empty()) { + auto request = std::move(sync_requests.front()); + sync_requests.pop_front(); + sync_request_mutex.unlock(); + request(); + current_sync_fence.fetch_add(1, std::memory_order_release); + sync_request_mutex.lock(); + sync_request_cv.notify_all(); } } @@ -207,78 +213,26 @@ struct GPU::Impl { /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value) { - // Synced GPU, is always in sync - if (!is_async) { - return; - } if (syncpoint_id == UINT32_MAX) { - // TODO: Research what this does. - LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); return; } MICROPROFILE_SCOPE(GPU_wait); - std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=, this] { - if (shutting_down.load(std::memory_order_relaxed)) { - // We're shutting down, ensure no threads continue to wait for the next syncpoint - return true; - } - return syncpoints.at(syncpoint_id).load() >= value; - }); + host1x.GetSyncpointManager().WaitHost(syncpoint_id, value); } void IncrementSyncPoint(u32 syncpoint_id) { - auto& syncpoint = syncpoints.at(syncpoint_id); - syncpoint++; - std::scoped_lock lock{sync_mutex}; - sync_cv.notify_all(); - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - if (!interrupt.empty()) { - u32 value = syncpoint.load(); - auto it = interrupt.begin(); - while (it != interrupt.end()) { - if (value >= *it) { - TriggerCpuInterrupt(syncpoint_id, *it); - it = interrupt.erase(it); - continue; - } - it++; - } - } + host1x.GetSyncpointManager().IncrementHost(syncpoint_id); } [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { - return syncpoints.at(syncpoint_id).load(); + return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); } void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { - std::scoped_lock lock{sync_mutex}; - u32 current_value = syncpoints.at(syncpoint_id).load(); - if ((static_cast(current_value) - static_cast(value)) >= 0) { + auto& syncpoint_manager = host1x.GetSyncpointManager(); + syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() { TriggerCpuInterrupt(syncpoint_id, value); - return; - } - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - bool contains = std::any_of(interrupt.begin(), interrupt.end(), - [value](u32 in_value) { return in_value == value; }); - if (contains) { - return; - } - interrupt.emplace_back(value); - } - - [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { - std::scoped_lock lock{sync_mutex}; - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - const auto iter = - std::find_if(interrupt.begin(), interrupt.end(), - [value](u32 interrupt_value) { return value == interrupt_value; }); - - if (iter == interrupt.end()) { - return false; - } - interrupt.erase(iter); - return true; + }); } [[nodiscard]] u64 GetTicks() const { @@ -387,8 +341,48 @@ struct GPU::Impl { interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } + void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, + Service::Nvidia::NvFence* fences, size_t num_fences) { + size_t current_request_counter{}; + { + std::unique_lock lk(request_swap_mutex); + if (free_swap_counters.empty()) { + current_request_counter = request_swap_counters.size(); + request_swap_counters.emplace_back(num_fences); + } else { + current_request_counter = free_swap_counters.front(); + request_swap_counters[current_request_counter] = num_fences; + free_swap_counters.pop_front(); + } + } + const auto wait_fence = + RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { + auto& syncpoint_manager = host1x.GetSyncpointManager(); + if (num_fences == 0) { + renderer->SwapBuffers(framebuffer); + } + const auto executer = [this, current_request_counter, + framebuffer_copy = *framebuffer]() { + { + std::unique_lock lk(request_swap_mutex); + if (--request_swap_counters[current_request_counter] != 0) { + return; + } + free_swap_counters.push_back(current_request_counter); + } + renderer->SwapBuffers(&framebuffer_copy); + }; + for (size_t i = 0; i < num_fences; i++) { + syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); + } + }); + gpu_thread.TickGPU(); + WaitForSyncOperation(wait_fence); + } + GPU& gpu; Core::System& system; + Host1x::Host1x& host1x; std::map> cdma_pushers; std::unique_ptr renderer; @@ -411,18 +405,11 @@ struct GPU::Impl { std::condition_variable sync_cv; - struct FlushRequest { - explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) - : fence{fence_}, addr{addr_}, size{size_} {} - u64 fence; - VAddr addr; - std::size_t size; - }; - - std::list flush_requests; - std::atomic current_flush_fence{}; - u64 last_flush_fence{}; - std::mutex flush_request_mutex; + std::list> sync_requests; + std::atomic current_sync_fence{}; + u64 last_sync_fence{}; + std::mutex sync_request_mutex; + std::condition_variable sync_request_cv; const bool is_async; @@ -433,6 +420,10 @@ struct GPU::Impl { std::unordered_map> channels; Tegra::Control::ChannelState* current_channel; s32 bound_channel{-1}; + + std::deque free_swap_counters; + std::deque request_swap_counters; + std::mutex request_swap_mutex; }; GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) @@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() { } u64 GPU::RequestFlush(VAddr addr, std::size_t size) { - return impl->RequestFlush(addr, size); + return impl->RequestSyncOperation( + [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); +} + +u64 GPU::CurrentSyncRequestFence() const { + return impl->CurrentSyncRequestFence(); } -u64 GPU::CurrentFlushRequestFence() const { - return impl->CurrentFlushRequestFence(); +void GPU::WaitForSyncOperation(u64 fence) { + return impl->WaitForSyncOperation(fence); } void GPU::TickWork() { impl->TickWork(); } +/// Gets a mutable reference to the Host1x interface +Host1x::Host1x& GPU::Host1x() { + return impl->host1x; +} + +/// Gets an immutable reference to the Host1x interface. +const Host1x::Host1x& GPU::Host1x() const { + return impl->host1x; +} + Engines::Maxwell3D& GPU::Maxwell3D() { return impl->Maxwell3D(); } @@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { return impl->ShaderNotify(); } +void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, + Service::Nvidia::NvFence* fences, size_t num_fences) { + impl->RequestSwapBuffers(framebuffer, fences, num_fences); +} + void GPU::WaitFence(u32 syncpoint_id, u32 value) { impl->WaitFence(syncpoint_id, value); } @@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { impl->RegisterSyncptInterrupt(syncpoint_id, value); } -bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { - return impl->CancelSyncptInterrupt(syncpoint_id, value); -} - u64 GPU::GetTicks() const { return impl->GetTicks(); } -- cgit v1.2.3 From 2931101e6f5aa755566ef40f6e6dc71909fd3e92 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 30 Jan 2022 22:26:01 +0100 Subject: NVDRV: Refactor Host1x --- src/video_core/gpu.cpp | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 1097db08a..e05c9a357 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -83,19 +83,11 @@ struct GPU::Impl { UNIMPLEMENTED(); } - void CreateHost1xChannel() { - if (host1x_channel) { - return; - } - host1x_channel = CreateChannel(0); - host1x_channel->memory_manager = std::make_shared(system); - InitChannel(*host1x_channel); - } - /// Binds a renderer to the GPU. void BindRenderer(std::unique_ptr renderer_) { renderer = std::move(renderer_); rasterizer = renderer->ReadRasterizer(); + host1x.MemoryManager().BindRasterizer(rasterizer); } /// Flush all current written commands into the host GPU for execution. @@ -173,12 +165,6 @@ struct GPU::Impl { return *current_channel->kepler_compute; } - /// Returns a reference to the GPU memory manager. - [[nodiscard]] Tegra::MemoryManager& MemoryManager() { - CreateHost1xChannel(); - return *host1x_channel->memory_manager; - } - /// Returns a reference to the GPU DMA pusher. [[nodiscard]] Tegra::DmaPusher& DmaPusher() { ASSERT(current_channel); @@ -299,7 +285,7 @@ struct GPU::Impl { } if (!cdma_pushers.contains(id)) { - cdma_pushers.insert_or_assign(id, std::make_unique(gpu)); + cdma_pushers.insert_or_assign(id, std::make_unique(host1x)); } // SubmitCommandBuffer would make the nvdec operations async, this is not currently working @@ -389,7 +375,6 @@ struct GPU::Impl { VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; - std::shared_ptr host1x_channel; s32 new_channel_id{1}; /// Shader build notifier std::unique_ptr shader_notify; @@ -510,14 +495,6 @@ const Engines::KeplerCompute& GPU::KeplerCompute() const { return impl->KeplerCompute(); } -Tegra::MemoryManager& GPU::MemoryManager() { - return impl->MemoryManager(); -} - -const Tegra::MemoryManager& GPU::MemoryManager() const { - return impl->MemoryManager(); -} - Tegra::DmaPusher& GPU::DmaPusher() { return impl->DmaPusher(); } -- cgit v1.2.3 From 920429fde745b3bf6d33b6ca991628f64988f754 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 30 Jan 2022 23:13:46 +0100 Subject: NVDRV: Further refactors and eliminate old code. --- src/video_core/gpu.cpp | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e05c9a357..a1d19b1c8 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -14,7 +14,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/frontend/emu_window.h" -#include "core/hardware_interrupt_manager.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/perf_stats.h" #include "video_core/cdma_pusher.h" @@ -36,8 +35,6 @@ namespace Tegra { -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); - struct GPU::Impl { explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, @@ -197,30 +194,6 @@ struct GPU::Impl { return *shader_notify; } - /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. - void WaitFence(u32 syncpoint_id, u32 value) { - if (syncpoint_id == UINT32_MAX) { - return; - } - MICROPROFILE_SCOPE(GPU_wait); - host1x.GetSyncpointManager().WaitHost(syncpoint_id, value); - } - - void IncrementSyncPoint(u32 syncpoint_id) { - host1x.GetSyncpointManager().IncrementHost(syncpoint_id); - } - - [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { - return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); - } - - void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { - auto& syncpoint_manager = host1x.GetSyncpointManager(); - syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() { - TriggerCpuInterrupt(syncpoint_id, value); - }); - } - [[nodiscard]] u64 GetTicks() const { // This values were reversed engineered by fincs from NVN // The gpu clock is reported in units of 385/625 nanoseconds @@ -322,11 +295,6 @@ struct GPU::Impl { gpu_thread.FlushAndInvalidateRegion(addr, size); } - void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const { - auto& interrupt_manager = system.InterruptManager(); - interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); - } - void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, Service::Nvidia::NvFence* fences, size_t num_fences) { size_t current_request_counter{}; @@ -524,22 +492,6 @@ void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, impl->RequestSwapBuffers(framebuffer, fences, num_fences); } -void GPU::WaitFence(u32 syncpoint_id, u32 value) { - impl->WaitFence(syncpoint_id, value); -} - -void GPU::IncrementSyncPoint(u32 syncpoint_id) { - impl->IncrementSyncPoint(syncpoint_id); -} - -u32 GPU::GetSyncpointValue(u32 syncpoint_id) const { - return impl->GetSyncpointValue(syncpoint_id); -} - -void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { - impl->RegisterSyncptInterrupt(syncpoint_id, value); -} - u64 GPU::GetTicks() const { return impl->GetTicks(); } -- cgit v1.2.3 From bc8b3d225eda388f0603830cbff8357893abb0f9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 6 Feb 2022 01:16:11 +0100 Subject: VideoCore: Refactor fencing system. --- src/video_core/gpu.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index a1d19b1c8..d7a3dd96b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -93,16 +93,13 @@ struct GPU::Impl { } /// Synchronizes CPU writes with Host GPU memory. - void SyncGuestHost() { - rasterizer->SyncGuestHost(); + void InvalidateGPUCache() { + rasterizer->InvalidateGPUCache(); } /// Signal the ending of command list. void OnCommandListEnd() { - if (is_async) { - // This command only applies to asynchronous GPU mode - gpu_thread.OnCommandListEnd(); - } + gpu_thread.OnCommandListEnd(); } /// Request a host GPU memory flush from the CPU. @@ -296,7 +293,7 @@ struct GPU::Impl { } void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - Service::Nvidia::NvFence* fences, size_t num_fences) { + std::array& fences, size_t num_fences) { size_t current_request_counter{}; { std::unique_lock lk(request_swap_mutex); @@ -412,8 +409,8 @@ void GPU::FlushCommands() { impl->FlushCommands(); } -void GPU::SyncGuestHost() { - impl->SyncGuestHost(); +void GPU::InvalidateGPUCache() { + impl->InvalidateGPUCache(); } void GPU::OnCommandListEnd() { @@ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { } void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - Service::Nvidia::NvFence* fences, size_t num_fences) { + std::array& fences, size_t num_fences) { impl->RequestSwapBuffers(framebuffer, fences, num_fences); } -- cgit v1.2.3 From ca3db0d7c94a20668781830ff852dbf512598efb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 1 Sep 2022 05:45:22 +0200 Subject: General: address feedback --- src/video_core/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d7a3dd96b..28b38273e 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -355,7 +355,7 @@ struct GPU::Impl { std::condition_variable sync_cv; - std::list> sync_requests; + std::list> sync_requests; std::atomic current_sync_fence{}; u64 last_sync_fence{}; std::mutex sync_request_mutex; -- cgit v1.2.3