diff options
32 files changed, 176 insertions, 126 deletions
| diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 906c486fd..5b51fcafa 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -95,11 +95,11 @@ add_custom_command(OUTPUT scm_rev.cpp  )  add_library(common STATIC +    algorithm.h      alignment.h      assert.h      detached_tasks.cpp      detached_tasks.h -    binary_find.h      bit_field.h      bit_util.h      cityhash.cpp diff --git a/src/common/binary_find.h b/src/common/algorithm.h index 5cc523bf9..e21b1373c 100644 --- a/src/common/binary_find.h +++ b/src/common/algorithm.h @@ -5,6 +5,12 @@  #pragma once  #include <algorithm> +#include <functional> + +// Algorithms that operate on iterators, much like the <algorithm> header. +// +// Note: If the algorithm is not general-purpose and/or doesn't operate on iterators, +//       it should probably not be placed within this header.  namespace Common { diff --git a/src/core/core.cpp b/src/core/core.cpp index 4d0ac72a5..ddc767e30 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -256,6 +256,8 @@ struct System::Impl {          is_powered_on = false;          exit_lock = false; +        gpu_core->WaitIdle(); +          // Shutdown emulation session          renderer.reset();          GDBStub::Shutdown(); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 941ebc93a..3a32d5b41 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -1140,8 +1140,9 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {      LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind));      if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { -        const auto backend = BCAT::CreateBackendFromSettings( -            [this](u64 tid) { return system.GetFileSystemController().GetBCATDirectory(tid); }); +        const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) { +            return system.GetFileSystemController().GetBCATDirectory(tid); +        });          const auto build_id_full = system.GetCurrentProcessBuildID();          u64 build_id{};          std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 9d6946bc5..b86fda29a 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp @@ -10,8 +10,8 @@  namespace Service::BCAT { -ProgressServiceBackend::ProgressServiceBackend(std::string_view event_name) { -    auto& kernel{Core::System::GetInstance().Kernel()}; +ProgressServiceBackend::ProgressServiceBackend(Kernel::KernelCore& kernel, +                                               std::string_view event_name) {      event = Kernel::WritableEvent::CreateEventPair(          kernel, Kernel::ResetType::Automatic,          std::string("ProgressServiceBackend:UpdateEvent:").append(event_name)); diff --git a/src/core/hle/service/bcat/backend/backend.h b/src/core/hle/service/bcat/backend/backend.h index 51dbd3316..ea4b16ad0 100644 --- a/src/core/hle/service/bcat/backend/backend.h +++ b/src/core/hle/service/bcat/backend/backend.h @@ -15,6 +15,14 @@  #include "core/hle/kernel/writable_event.h"  #include "core/hle/result.h" +namespace Core { +class System; +} + +namespace Kernel { +class KernelCore; +} +  namespace Service::BCAT {  struct DeliveryCacheProgressImpl; @@ -88,7 +96,7 @@ public:      void FinishDownload(ResultCode result);  private: -    explicit ProgressServiceBackend(std::string_view event_name); +    explicit ProgressServiceBackend(Kernel::KernelCore& kernel, std::string_view event_name);      Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent() const;      DeliveryCacheProgressImpl& GetImpl(); @@ -145,6 +153,6 @@ public:      std::optional<std::vector<u8>> GetLaunchParameter(TitleIDVersion title) override;  }; -std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter); +std::unique_ptr<Backend> CreateBackendFromSettings(Core::System& system, DirectoryGetter getter);  } // namespace Service::BCAT diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index 64022982b..918159e11 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp @@ -104,14 +104,15 @@ std::string GetZIPFilePath(u64 title_id) {  // If the error is something the user should know about (build ID mismatch, bad client version),  // display an error. -void HandleDownloadDisplayResult(DownloadResult res) { +void HandleDownloadDisplayResult(const AM::Applets::AppletManager& applet_manager, +                                 DownloadResult res) {      if (res == DownloadResult::Success || res == DownloadResult::NoResponse ||          res == DownloadResult::GeneralWebError || res == DownloadResult::GeneralFSError ||          res == DownloadResult::NoMatchTitleId || res == DownloadResult::InvalidContentType) {          return;      } -    const auto& frontend{Core::System::GetInstance().GetAppletManager().GetAppletFrontendSet()}; +    const auto& frontend{applet_manager.GetAppletFrontendSet()};      frontend.error->ShowCustomErrorText(          ResultCode(-1), "There was an error while attempting to use Boxcat.",          DOWNLOAD_RESULT_LOG_MESSAGES[static_cast<std::size_t>(res)], [] {}); @@ -264,12 +265,13 @@ private:      u64 build_id;  }; -Boxcat::Boxcat(DirectoryGetter getter) : Backend(std::move(getter)) {} +Boxcat::Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter) +    : Backend(std::move(getter)), applet_manager{applet_manager_} {}  Boxcat::~Boxcat() = default; -void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, -                         ProgressServiceBackend& progress, +void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, DirectoryGetter dir_getter, +                         TitleIDVersion title, ProgressServiceBackend& progress,                           std::optional<std::string> dir_name = {}) {      progress.SetNeedHLELock(true); @@ -295,7 +297,7 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title,              FileUtil::Delete(zip_path);          } -        HandleDownloadDisplayResult(res); +        HandleDownloadDisplayResult(applet_manager, res);          progress.FinishDownload(ERROR_GENERAL_BCAT_FAILURE);          return;      } @@ -364,17 +366,24 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title,  bool Boxcat::Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) {      is_syncing.exchange(true); -    std::thread([this, title, &progress] { SynchronizeInternal(dir_getter, title, progress); }) + +    std::thread([this, title, &progress] { +        SynchronizeInternal(applet_manager, dir_getter, title, progress); +    })          .detach(); +      return true;  }  bool Boxcat::SynchronizeDirectory(TitleIDVersion title, std::string name,                                    ProgressServiceBackend& progress) {      is_syncing.exchange(true); -    std::thread( -        [this, title, name, &progress] { SynchronizeInternal(dir_getter, title, progress, name); }) + +    std::thread([this, title, name, &progress] { +        SynchronizeInternal(applet_manager, dir_getter, title, progress, name); +    })          .detach(); +      return true;  } @@ -420,7 +429,7 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title)                  FileUtil::Delete(path);              } -            HandleDownloadDisplayResult(res); +            HandleDownloadDisplayResult(applet_manager, res);              return std::nullopt;          }      } diff --git a/src/core/hle/service/bcat/backend/boxcat.h b/src/core/hle/service/bcat/backend/boxcat.h index 601151189..d65b42e58 100644 --- a/src/core/hle/service/bcat/backend/boxcat.h +++ b/src/core/hle/service/bcat/backend/boxcat.h @@ -9,6 +9,10 @@  #include <optional>  #include "core/hle/service/bcat/backend/backend.h" +namespace Service::AM::Applets { +class AppletManager; +} +  namespace Service::BCAT {  struct EventStatus { @@ -20,12 +24,13 @@ struct EventStatus {  /// Boxcat is yuzu's custom backend implementation of Nintendo's BCAT service. It is free to use and  /// doesn't require a switch or nintendo account. The content is controlled by the yuzu team.  class Boxcat final : public Backend { -    friend void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, +    friend void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, +                                    DirectoryGetter dir_getter, TitleIDVersion title,                                      ProgressServiceBackend& progress,                                      std::optional<std::string> dir_name);  public: -    explicit Boxcat(DirectoryGetter getter); +    explicit Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter);      ~Boxcat() override;      bool Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) override; @@ -53,6 +58,7 @@ private:      class Client;      std::unique_ptr<Client> client; +    AM::Applets::AppletManager& applet_manager;  };  } // namespace Service::BCAT diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp index 4e4aa758b..6d9d1527d 100644 --- a/src/core/hle/service/bcat/module.cpp +++ b/src/core/hle/service/bcat/module.cpp @@ -125,7 +125,11 @@ private:  class IBcatService final : public ServiceFramework<IBcatService> {  public:      explicit IBcatService(Core::System& system_, Backend& backend_) -        : ServiceFramework("IBcatService"), system{system_}, backend{backend_} { +        : ServiceFramework("IBcatService"), system{system_}, backend{backend_}, +          progress{{ +              ProgressServiceBackend{system_.Kernel(), "Normal"}, +              ProgressServiceBackend{system_.Kernel(), "Directory"}, +          }} {          // clang-format off          static const FunctionInfo functions[] = {              {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"}, @@ -249,10 +253,7 @@ private:      Core::System& system;      Backend& backend; -    std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress{ -        ProgressServiceBackend{"Normal"}, -        ProgressServiceBackend{"Directory"}, -    }; +    std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress;  };  void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) { @@ -557,12 +558,12 @@ void Module::Interface::CreateDeliveryCacheStorageServiceWithApplicationId(      rb.PushIpcInterface<IDeliveryCacheStorageService>(fsc.GetBCATDirectory(title_id));  } -std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { -    const auto backend = Settings::values.bcat_backend; - +std::unique_ptr<Backend> CreateBackendFromSettings([[maybe_unused]] Core::System& system, +                                                   DirectoryGetter getter) {  #ifdef YUZU_ENABLE_BOXCAT -    if (backend == "boxcat") -        return std::make_unique<Boxcat>(std::move(getter)); +    if (Settings::values.bcat_backend == "boxcat") { +        return std::make_unique<Boxcat>(system.GetAppletManager(), std::move(getter)); +    }  #endif      return std::make_unique<NullBackend>(std::move(getter)); @@ -571,7 +572,8 @@ std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) {  Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,                               FileSystem::FileSystemController& fsc_, const char* name)      : ServiceFramework(name), fsc{fsc_}, module{std::move(module_)}, -      backend{CreateBackendFromSettings([&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, +      backend{CreateBackendFromSettings(system_, +                                        [&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })},        system{system_} {}  Module::Interface::~Interface() = default; diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f764388bc..3f7b8e670 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -5,6 +5,7 @@  #include "common/assert.h"  #include "common/logging/log.h"  #include "core/core.h" +#include "core/core_timing.h"  #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"  #include "core/hle/service/nvdrv/devices/nvmap.h"  #include "core/perf_stats.h" @@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3          transform, crop_rect};      system.GetPerfStats().EndGameFrame(); +    system.GetPerfStats().EndSystemFrame();      system.GPU().SwapBuffers(&framebuffer); +    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); +    system.GetPerfStats().BeginSystemFrame();  }  } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index eb88fee1b..b27ee0502 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&          return NvResult::BadParameter;      } +    u32 event_id = params.value & 0x00FF; + +    if (event_id >= MaxNvEvents) { +        std::memcpy(output.data(), ¶ms, sizeof(params)); +        return NvResult::BadParameter; +    } + +    auto event = events_interface.events[event_id];      auto& gpu = system.GPU();      // This is mostly to take into account unimplemented features. As synced      // gpu is always synced.      if (!gpu.IsAsync()) { +        event.writable->Signal();          return NvResult::Success;      }      auto lock = gpu.LockSync();      const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);      const s32 diff = current_syncpoint_value - params.threshold;      if (diff >= 0) { +        event.writable->Signal();          params.value = current_syncpoint_value;          std::memcpy(output.data(), ¶ms, sizeof(params));          return NvResult::Success; @@ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&          return NvResult::Timeout;      } -    u32 event_id; -    if (is_async) { -        event_id = params.value & 0x00FF; -        if (event_id >= MaxNvEvents) { -            std::memcpy(output.data(), ¶ms, sizeof(params)); -            return NvResult::BadParameter; -        } -    } else { -        if (ctrl.fresh_call) { -            const auto result = events_interface.GetFreeEvent(); -            if (result) { -                event_id = *result; -            } else { -                LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); -                event_id = params.value & 0x00FF; -            } -        } else { -            event_id = ctrl.event_id; -        } -    } -      EventState status = events_interface.status[event_id];      if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) {          events_interface.SetEventStatus(event_id, EventState::Waiting); @@ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&              params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;          }          params.value |= event_id; -        events_interface.events[event_id].writable->Clear(); +        event.writable->Clear();          gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);          if (!is_async && ctrl.fresh_call) {              ctrl.must_delay = true; diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 5e0c23602..68d139cfb 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp @@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {      IPC::ResponseBuilder rb{ctx, 3, 1};      rb.Push(RESULT_SUCCESS);      if (event_id < MaxNvEvents) { -        rb.PushCopyObjects(nvdrv->GetEvent(event_id)); +        auto event = nvdrv->GetEvent(event_id); +        event->Clear(); +        rb.PushCopyObjects(event);          rb.Push<u32>(NvResult::Success);      } else {          rb.Push<u32>(0); diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 307a7e928..7bfb99e34 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -40,8 +40,8 @@ Module::Module(Core::System& system) {      auto& kernel = system.Kernel();      for (u32 i = 0; i < MaxNvEvents; i++) {          std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); -        events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( -            kernel, Kernel::ResetType::Automatic, event_label); +        events_interface.events[i] = +            Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label);          events_interface.status[i] = EventState::Free;          events_interface.registered[i] = false;      } diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index e1a07d3ee..55b68eb0c 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -14,8 +14,8 @@  namespace Service::NVFlinger { -BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) { -    auto& kernel = Core::System::GetInstance().Kernel(); +BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id) +    : id(id), layer_id(layer_id) {      buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,                                                                 "BufferQueue NativeHandle");  } diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 356bedb81..8f9b18547 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -15,6 +15,10 @@  #include "core/hle/kernel/writable_event.h"  #include "core/hle/service/nvdrv/nvdata.h" +namespace Kernel { +class KernelCore; +} +  namespace Service::NVFlinger {  struct IGBPBuffer { @@ -44,7 +48,7 @@ public:          NativeWindowFormat = 2,      }; -    BufferQueue(u32 id, u64 layer_id); +    explicit BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id);      ~BufferQueue();      enum class BufferTransformFlags : u32 { diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 2e4d707b9..cc9522aad 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -83,7 +83,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {      const u64 layer_id = next_layer_id++;      const u32 buffer_queue_id = next_buffer_queue_id++; -    buffer_queues.emplace_back(buffer_queue_id, layer_id); +    buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id);      display->CreateLayer(layer_id, buffer_queues.back());      return layer_id;  } @@ -187,14 +187,18 @@ void NVFlinger::Compose() {          MicroProfileFlip();          if (!buffer) { -            // There was no queued buffer to draw, render previous frame -            system.GetPerfStats().EndGameFrame(); -            system.GPU().SwapBuffers({});              continue;          }          const auto& igbp_buffer = buffer->get().igbp_buffer; +        const auto& gpu = system.GPU(); +        const auto& multi_fence = buffer->get().multi_fence; +        for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { +            const auto& fence = multi_fence.fences[fence_id]; +            gpu.WaitFence(fence.id, fence.value); +        } +          // Now send the buffer to the GPU for drawing.          // TODO(Subv): Support more than just disp0. The display device selection is probably based          // on which display we're drawing (Default, Internal, External, etc) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include "common/assert.h" +#include "common/microprofile.h"  #include "core/core.h"  #include "core/core_timing.h"  #include "core/memory.h" @@ -17,6 +18,8 @@  namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); +  GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)      : system{system}, renderer{renderer}, is_async{is_async} {      auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {      return *dma_pusher;  } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { +    // Synced GPU, is always in sync +    if (!is_async) { +        return; +    } +    MICROPROFILE_SCOPE(GPU_wait); +    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { +    } +} +  void GPU::IncrementSyncPoint(const u32 syncpoint_id) {      syncpoints[syncpoint_id]++;      std::lock_guard lock{sync_mutex}; @@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {          block.sequence = regs.semaphore_sequence;          // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of          // CoreTiming -        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); +        block.timestamp = system.CoreTiming().GetTicks();          memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,                                     sizeof(block));      } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,12 @@ public:      /// Returns a reference to the GPU DMA pusher.      Tegra::DmaPusher& DmaPusher(); +    // Waits for the GPU to finish working +    virtual void WaitIdle() const = 0; + +    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. +    void WaitFence(u32 syncpoint_id, u32 value) const; +      void IncrementSyncPoint(u32 syncpoint_id);      u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con      interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);  } +void GPUAsynch::WaitIdle() const { +    gpu_thread.WaitIdle(); +} +  } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public:      void FlushRegion(CacheAddr addr, u64 size) override;      void InvalidateRegion(CacheAddr addr, u64 size) override;      void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; +    void WaitIdle() const override;  protected:      void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public:      void FlushRegion(CacheAddr addr, u64 size) override;      void InvalidateRegion(CacheAddr addr, u64 size) override;      void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; +    void WaitIdle() const override {}  protected:      void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@  #include "common/assert.h"  #include "common/microprofile.h"  #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h"  #include "core/frontend/scope_acquire_window_context.h"  #include "video_core/dma_pusher.h"  #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {  void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {      thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; -    synchronization_event = system.CoreTiming().RegisterEvent( -        "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });  }  void ThreadManager::SubmitList(Tegra::CommandList&& entries) { -    const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; -    const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; -    system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); +    PushCommand(SubmitListCommand(std::move(entries)));  }  void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {      InvalidateRegion(addr, size);  } +void ThreadManager::WaitIdle() const { +    while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { +    } +} +  u64 ThreadManager::PushCommand(CommandData&& command_data) {      const u64 fence{++state.last_fence};      state.queue.Push(CommandDataContainer(std::move(command_data), fence));      return fence;  } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { -    while (signaled_fence.load() < fence) -        ; -} -  } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher;  namespace Core {  class System; -namespace Timing { -struct EventType; -} // namespace Timing  } // namespace Core  namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer {  struct SynchState final {      std::atomic_bool is_running{true}; -    void WaitForSynchronization(u64 fence); -      using CommandQueue = Common::SPSCQueue<CommandDataContainer>;      CommandQueue queue;      u64 last_fence{}; @@ -121,6 +116,9 @@ public:      /// Notify rasterizer that any caches of the specified region should be flushed and invalidated      void FlushAndInvalidateRegion(CacheAddr addr, u64 size); +    // Wait until the gpu thread is idle. +    void WaitIdle() const; +  private:      /// Pushes a command to be executed by the GPU thread      u64 PushCommand(CommandData&& command_data); @@ -128,7 +126,6 @@ private:  private:      SynchState state;      Core::System& system; -    Core::Timing::EventType* synchronization_event{};      std::thread thread;      std::thread::id thread_id;  }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a85f730a8..cbcf81414 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {  }  void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { +    std::lock_guard lock{pages_mutex};      const u64 page_start{addr >> Memory::PAGE_BITS};      const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,6 +9,7 @@  #include <cstddef>  #include <map>  #include <memory> +#include <mutex>  #include <optional>  #include <tuple>  #include <utility> @@ -230,6 +231,8 @@ private:      using CachedPageMap = boost::icl::interval_map<u64, int>;      CachedPageMap cached_pages; + +    std::mutex pages_mutex;  };  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6a610a3bc..a3524a6a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1148,7 +1148,7 @@ private:          for (const auto& variant : extras) {              if (const auto argument = std::get_if<TextureArgument>(&variant)) {                  expr += GenerateTextureArgument(*argument); -            } else if (std::get_if<TextureAoffi>(&variant)) { +            } else if (std::holds_alternative<TextureAoffi>(variant)) {                  expr += GenerateTextureAoffi(meta->aoffi);              } else {                  UNREACHABLE(); @@ -1158,8 +1158,8 @@ private:          return expr + ')';      } -    std::string GenerateTextureArgument(TextureArgument argument) { -        const auto [type, operand] = argument; +    std::string GenerateTextureArgument(const TextureArgument& argument) { +        const auto& [type, operand] = argument;          if (operand == nullptr) {              return {};          } @@ -1235,7 +1235,7 @@ private:      std::string BuildImageValues(Operation operation) {          constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; -        const auto meta{std::get<MetaImage>(operation.GetMeta())}; +        const auto& meta{std::get<MetaImage>(operation.GetMeta())};          const std::size_t values_count{meta.values.size()};          std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); @@ -1780,14 +1780,14 @@ private:              return {"0", Type::Int};          } -        const auto meta{std::get<MetaImage>(operation.GetMeta())}; +        const auto& meta{std::get<MetaImage>(operation.GetMeta())};          return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),                              BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),                  Type::Uint};      }      Expression ImageStore(Operation operation) { -        const auto meta{std::get<MetaImage>(operation.GetMeta())}; +        const auto& meta{std::get<MetaImage>(operation.GetMeta())};          code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),                       BuildIntegerCoordinates(operation), BuildImageValues(operation));          return {}; @@ -1795,7 +1795,7 @@ private:      template <const std::string_view& opname>      Expression AtomicImage(Operation operation) { -        const auto meta{std::get<MetaImage>(operation.GetMeta())}; +        const auto& meta{std::get<MetaImage>(operation.GetMeta())};          ASSERT(meta.values.size() == 1);          return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), @@ -2246,7 +2246,7 @@ private:          code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());      } -    std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { +    std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {          return fmt::format("{}_{}_{}", name, index, suffix);      } @@ -2271,17 +2271,15 @@ private:      ShaderWriter code;  }; -static constexpr std::string_view flow_var = "flow_var_"; -  std::string GetFlowVariable(u32 i) { -    return fmt::format("{}{}", flow_var, i); +    return fmt::format("flow_var_{}", i);  }  class ExprDecompiler {  public:      explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} -    void operator()(VideoCommon::Shader::ExprAnd& expr) { +    void operator()(const ExprAnd& expr) {          inner += "( ";          std::visit(*this, *expr.operand1);          inner += " && "; @@ -2289,7 +2287,7 @@ public:          inner += ')';      } -    void operator()(VideoCommon::Shader::ExprOr& expr) { +    void operator()(const ExprOr& expr) {          inner += "( ";          std::visit(*this, *expr.operand1);          inner += " || "; @@ -2297,17 +2295,17 @@ public:          inner += ')';      } -    void operator()(VideoCommon::Shader::ExprNot& expr) { +    void operator()(const ExprNot& expr) {          inner += '!';          std::visit(*this, *expr.operand1);      } -    void operator()(VideoCommon::Shader::ExprPredicate& expr) { +    void operator()(const ExprPredicate& expr) {          const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);          inner += decomp.GetPredicate(pred);      } -    void operator()(VideoCommon::Shader::ExprCondCode& expr) { +    void operator()(const ExprCondCode& expr) {          const Node cc = decomp.ir.GetConditionCode(expr.cc);          std::string target; @@ -2329,15 +2327,15 @@ public:          inner += target;      } -    void operator()(VideoCommon::Shader::ExprVar& expr) { +    void operator()(const ExprVar& expr) {          inner += GetFlowVariable(expr.var_index);      } -    void operator()(VideoCommon::Shader::ExprBoolean& expr) { +    void operator()(const ExprBoolean& expr) {          inner += expr.value ? "true" : "false";      } -    std::string& GetResult() { +    const std::string& GetResult() const {          return inner;      } @@ -2350,7 +2348,7 @@ class ASTDecompiler {  public:      explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} -    void operator()(VideoCommon::Shader::ASTProgram& ast) { +    void operator()(const ASTProgram& ast) {          ASTNode current = ast.nodes.GetFirst();          while (current) {              Visit(current); @@ -2358,7 +2356,7 @@ public:          }      } -    void operator()(VideoCommon::Shader::ASTIfThen& ast) { +    void operator()(const ASTIfThen& ast) {          ExprDecompiler expr_parser{decomp};          std::visit(expr_parser, *ast.condition);          decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); @@ -2372,7 +2370,7 @@ public:          decomp.code.AddLine("}}");      } -    void operator()(VideoCommon::Shader::ASTIfElse& ast) { +    void operator()(const ASTIfElse& ast) {          decomp.code.AddLine("else {{");          decomp.code.scope++;          ASTNode current = ast.nodes.GetFirst(); @@ -2384,29 +2382,29 @@ public:          decomp.code.AddLine("}}");      } -    void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { +    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {          UNREACHABLE();      } -    void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { +    void operator()(const ASTBlockDecoded& ast) {          decomp.VisitBlock(ast.nodes);      } -    void operator()(VideoCommon::Shader::ASTVarSet& ast) { +    void operator()(const ASTVarSet& ast) {          ExprDecompiler expr_parser{decomp};          std::visit(expr_parser, *ast.condition);          decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());      } -    void operator()(VideoCommon::Shader::ASTLabel& ast) { +    void operator()(const ASTLabel& ast) {          decomp.code.AddLine("// Label_{}:", ast.index);      } -    void operator()(VideoCommon::Shader::ASTGoto& ast) { +    void operator()([[maybe_unused]] const ASTGoto& ast) {          UNREACHABLE();      } -    void operator()(VideoCommon::Shader::ASTDoWhile& ast) { +    void operator()(const ASTDoWhile& ast) {          ExprDecompiler expr_parser{decomp};          std::visit(expr_parser, *ast.condition);          decomp.code.AddLine("do {{"); @@ -2420,7 +2418,7 @@ public:          decomp.code.AddLine("}} while({});", expr_parser.GetResult());      } -    void operator()(VideoCommon::Shader::ASTReturn& ast) { +    void operator()(const ASTReturn& ast) {          const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);          if (!is_true) {              ExprDecompiler expr_parser{decomp}; @@ -2440,7 +2438,7 @@ public:          }      } -    void operator()(VideoCommon::Shader::ASTBreak& ast) { +    void operator()(const ASTBreak& ast) {          const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);          if (!is_true) {              ExprDecompiler expr_parser{decomp}; @@ -2455,7 +2453,7 @@ public:          }      } -    void Visit(VideoCommon::Shader::ASTNode& node) { +    void Visit(const ASTNode& node) {          std::visit(*this, *node->GetInnerData());      } @@ -2468,9 +2466,9 @@ void GLSLDecompiler::DecompileAST() {      for (u32 i = 0; i < num_flow_variables; i++) {          code.AddLine("bool {} = false;", GetFlowVariable(i));      } +      ASTDecompiler decompiler{*this}; -    VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); -    decompiler.Visit(program); +    decompiler.Visit(ir.GetASTProgram());  }  } // Anonymous namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst  RendererOpenGL::~RendererOpenGL() = default;  void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { -    system.GetPerfStats().EndSystemFrame(); -      // Maintain the rasterizer's state as a priority      OpenGLState prev_state = OpenGLState::GetCurState();      state.AllDirty(); @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {      render_window.PollEvents(); -    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); -    system.GetPerfStats().BeginSystemFrame(); -      // Restore the rasterizer state      prev_state.AllDirty();      prev_state.Apply(); diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 338bab17c..447fb5c1d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -410,7 +410,7 @@ public:      explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}      explicit OperationNode(OperationCode code, Meta meta) -        : OperationNode(code, meta, std::vector<Node>{}) {} +        : OperationNode(code, std::move(meta), std::vector<Node>{}) {}      explicit OperationNode(OperationCode code, std::vector<Node> operands)          : OperationNode(code, Meta{}, std::move(operands)) {} diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 250afc6d6..ef6b3592e 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -252,6 +252,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,          default:              break;          } +        break;      case Tegra::Texture::TextureFormat::R32_G32_B32_A32:          switch (component_type) {          case Tegra::Texture::ComponentType::FLOAT: diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 683c49207..829268b4c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -2,6 +2,7 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include "common/algorithm.h"  #include "common/assert.h"  #include "common/common_types.h"  #include "common/microprofile.h" diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5e497e49f..1bed82898 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -4,12 +4,11 @@  #pragma once -#include <algorithm> +#include <optional> +#include <tuple>  #include <unordered_map>  #include <vector> -#include "common/assert.h" -#include "common/binary_find.h"  #include "common/common_types.h"  #include "video_core/gpu.h"  #include "video_core/morton.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8caa041be..6a92b22d3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -403,7 +403,7 @@ private:          if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {              return RecycleStrategy::Flush;          } -        for (auto s : overlaps) { +        for (const auto& s : overlaps) {              const auto& s_params = s->GetSurfaceParams();              if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {                  return RecycleStrategy::Flush; @@ -584,7 +584,7 @@ private:          } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {              return {};          } -        for (auto surface : overlaps) { +        for (const auto& surface : overlaps) {              Unregister(surface);          }          new_surface->MarkAsModified(modified, Tick()); | 
