diff options
| author | ameerj <52414509+ameerj@users.noreply.github.com> | 2020-12-28 01:02:06 -0500 | 
|---|---|---|
| committer | ameerj <52414509+ameerj@users.noreply.github.com> | 2021-01-07 14:33:45 -0500 | 
| commit | 2c27127d04a155fe0f893e84263d58f14473785d (patch) | |
| tree | e72b7d973f5c0dd4a553f815a632bf8fcc687998 | |
| parent | bcb702fa3e9d3f11748082f2ae9a5c1986b1ad8a (diff) | |
nvdec syncpt incorporation
laying the groundwork for async gpu, although this does not fully implement async nvdec operations
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | 5 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | 3 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | 26 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h | 14 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 5 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_vic.h | 4 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/nvdrv.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.h | 10 | ||||
| -rw-r--r-- | src/video_core/command_classes/host1x.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/command_classes/sync_manager.cpp | 2 | 
11 files changed, 59 insertions, 37 deletions
| diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index d8735491c..36970f828 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -11,8 +11,9 @@  namespace Service::Nvidia::Devices { -nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) -    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, +                           SyncpointManager& syncpoint_manager) +    : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}  nvhost_nvdec::~nvhost_nvdec() = default;  NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 79b8b6de1..77ef53cdd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices {  class nvhost_nvdec final : public nvhost_nvdec_common {  public: -    explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); +    explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, +                          SyncpointManager& syncpoint_manager);      ~nvhost_nvdec() override;      NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index b49cecb42..64370ad4c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -11,6 +11,7 @@  #include "core/core.h"  #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"  #include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h"  #include "core/memory.h"  #include "video_core/memory_manager.h"  #include "video_core/renderer_base.h" @@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s  }  } // Anonymous namespace -nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) -    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, +                                         SyncpointManager& syncpoint_manager) +    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {}  nvhost_nvdec_common::~nvhost_nvdec_common() = default;  NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { @@ -71,10 +73,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u      offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);      offset = SpliceVectors(input, fences, params.fence_count, offset); -    // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment +    for (std::size_t i = 0; i < syncpt_increments.size(); i++) { +        SyncptIncr syncpt_incr = syncpt_increments[i]; +        fences[i].id = syncpt_incr.id; +        fences[i].value = +            syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments); +    }      auto& gpu = system.GPU(); -      for (const auto& cmd_buffer : command_buffers) {          auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);          ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); @@ -89,6 +95,10 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u                                        cmdlist.size() * sizeof(u32));          gpu.PushCommandBuffer(cmdlist);      } +    fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1); + +    Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}}; +    gpu.PushCommandBuffer(cmdlist);      std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit));      // Some games expect command_buffers to be written back @@ -98,6 +108,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u      offset = WriteVectors(output, reloc_shifts, offset);      offset = WriteVectors(output, syncpt_increments, offset);      offset = WriteVectors(output, wait_checks, offset); +    offset = WriteVectors(output, fences, offset);      return NvResult::Success;  } @@ -107,9 +118,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve      std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint));      LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); -    // We found that implementing this causes deadlocks with async gpu, along with degraded -    // performance. TODO: RE the nvdec async implementation -    params.value = 0; +    if (device_syncpoints[params.param] == 0) { +        device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); +    } +    params.value = device_syncpoints[params.param];      std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint));      return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index d9f95ba58..4c9d4ba41 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -10,12 +10,16 @@  #include "common/swap.h"  #include "core/hle/service/nvdrv/devices/nvdevice.h" -namespace Service::Nvidia::Devices { +namespace Service::Nvidia { +class SyncpointManager; + +namespace Devices {  class nvmap;  class nvhost_nvdec_common : public nvdevice {  public: -    explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); +    explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, +                                 SyncpointManager& syncpoint_manager);      ~nvhost_nvdec_common() override;  protected: @@ -157,8 +161,10 @@ protected:      s32_le nvmap_fd{};      u32_le submit_timeout{};      std::shared_ptr<nvmap> nvmap_dev; - +    SyncpointManager& syncpoint_manager; +    std::array<u32, MaxSyncPoints> device_syncpoints{};      // This is expected to be ordered, therefore we must use a map, not unordered_map      std::map<GPUVAddr, BufferMap> buffer_mappings;  }; -}; // namespace Service::Nvidia::Devices +}; // namespace Devices +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 805fe86ae..72499654c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -10,8 +10,9 @@  #include "video_core/renderer_base.h"  namespace Service::Nvidia::Devices { -nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) -    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, +                       SyncpointManager& syncpoint_manager) +    : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}  nvhost_vic::~nvhost_vic() = default; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index b2e11f4d4..f401c61fa 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -7,11 +7,11 @@  #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"  namespace Service::Nvidia::Devices { -class nvmap;  class nvhost_vic final : public nvhost_nvdec_common {  public: -    explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); +    explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, +                        SyncpointManager& syncpoint_manager);      ~nvhost_vic();      NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index e03195afe..620c18728 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {      devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);      devices["/dev/nvhost-ctrl"] =          std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); -    devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); +    devices["/dev/nvhost-nvdec"] = +        std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);      devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); -    devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); +    devices["/dev/nvhost-vic"] = +        std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);  }  Module::~Module() = default; diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index e3e7432f7..94679d5d1 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_)      : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),        vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),        host1x_processor(std::make_unique<Host1x>(gpu)), -      nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), -      vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {} +      sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}  CDmaPusher::~CDmaPusher() = default; @@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {              const auto syncpoint_id = static_cast<u32>(data & 0xFF);              const auto cond = static_cast<u32>((data >> 8) & 0xFF);              if (cond == 0) { -                nvdec_sync->Increment(syncpoint_id); +                sync_manager->Increment(syncpoint_id);              } else { -                nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); -                nvdec_sync->SignalDone(syncpoint_id); +                sync_manager->SignalDone( +                    sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));              }              break;          } @@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {              const auto syncpoint_id = static_cast<u32>(data & 0xFF);              const auto cond = static_cast<u32>((data >> 8) & 0xFF);              if (cond == 0) { -                vic_sync->Increment(syncpoint_id); +                sync_manager->Increment(syncpoint_id);              } else { -                vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); -                vic_sync->SignalDone(syncpoint_id); +                sync_manager->SignalDone( +                    sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));              }              break;          } diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 0db1cd646..8ca70b6dd 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -116,12 +116,10 @@ private:      void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);      GPU& gpu; - -    std::shared_ptr<Nvdec> nvdec_processor; -    std::unique_ptr<Vic> vic_processor; -    std::unique_ptr<Host1x> host1x_processor; -    std::unique_ptr<SyncptIncrManager> nvdec_sync; -    std::unique_ptr<SyncptIncrManager> vic_sync; +    std::shared_ptr<Tegra::Nvdec> nvdec_processor; +    std::unique_ptr<Tegra::Vic> vic_processor; +    std::unique_ptr<Tegra::Host1x> host1x_processor; +    std::unique_ptr<SyncptIncrManager> sync_manager;      ChClassId current_class{};      ThiRegisters vic_thi_state{};      ThiRegisters nvdec_thi_state{}; diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index c4dd4881a..9d0a1b4d9 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -34,6 +34,8 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen  }  void Tegra::Host1x::Execute(u32 data) { -    // This method waits on a valid syncpoint. -    // TODO: Implement when proper Async is in place +    u32 syncpointId = (data & 0xFF); +    u32 threshold = state.load_syncpoint_payload32; + +    gpu.WaitFence(syncpointId, threshold);  } diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp index 19dc9e0ab..579857766 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/command_classes/sync_manager.cpp @@ -38,7 +38,7 @@ u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {  }  void SyncptIncrManager::SignalDone(u32 handle) { -    const auto done_incr = +    const auto& done_incr =          std::find_if(increments.begin(), increments.end(),                       [handle](const SyncptIncr& incr) { return incr.id == handle; });      if (done_incr != increments.cend()) { | 
