video_core: Block in WaitFence.

This function is called rarely and blocks quite often for a long time. So don't waste power and let the CPU sleep. This might also increase the performance as the other cores might be allowed to clock higher.
author: Markus Wick <markus@selfnet.de> 2019-12-30 13:03:20 +0100
committer: Markus Wick <markus@selfnet.de> 2019-12-30 13:04:53 +0100
commit: cb9dd01ffd3f54f5592330e3a37e2b26975bf209 (patch)
tree: dc41e5bea7e4844fceebbbb2d7481eb026bfcfd9 /src
parent: f846e3d6d0e973485a53bb87b913059060dcfdbc (diff)
3 files changed, 9 insertions, 5 deletions
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 52623cf89..9810d2c64 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -192,7 +192,7 @@ void NVFlinger::Compose() {
 
         const auto& igbp_buffer = buffer->get().igbp_buffer;
 
-        const auto& gpu = system.GPU();
+        auto& gpu = system.GPU();
         const auto& multi_fence = buffer->get().multi_fence;
         for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
             const auto& fence = multi_fence.fences[fence_id];
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 095660115..b9c5c41a2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
     return *dma_pusher;
 }
 
-void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+void GPU::WaitFence(u32 syncpoint_id, u32 value) {
     // Synced GPU, is always in sync
     if (!is_async) {
         return;
     }
     MICROPROFILE_SCOPE(GPU_wait);
-    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
-    }
+    std::unique_lock lock{sync_mutex};
+    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
 }
 
 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
     syncpoints[syncpoint_id]++;
     std::lock_guard lock{sync_mutex};
+    sync_cv.notify_all();
     if (!syncpt_interrupts[syncpoint_id].empty()) {
         u32 value = syncpoints[syncpoint_id].load();
         auto it = syncpt_interrupts[syncpoint_id].begin();
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ecc338ae9..b648317bb 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <atomic>
+#include <condition_variable>
 #include <list>
 #include <memory>
 #include <mutex>
@@ -181,7 +182,7 @@ public:
     virtual void WaitIdle() const = 0;
 
     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
-    void WaitFence(u32 syncpoint_id, u32 value) const;
+    void WaitFence(u32 syncpoint_id, u32 value);
 
     void IncrementSyncPoint(u32 syncpoint_id);
 
@@ -312,6 +313,8 @@ private:
 
     std::mutex sync_mutex;
 
+    std::condition_variable sync_cv;
+
     const bool is_async;
 };
author	Markus Wick <markus@selfnet.de>	2019-12-30 13:03:20 +0100
committer	Markus Wick <markus@selfnet.de>	2019-12-30 13:04:53 +0100
commit	cb9dd01ffd3f54f5592330e3a37e2b26975bf209 (patch)
tree	dc41e5bea7e4844fceebbbb2d7481eb026bfcfd9 /src
parent	f846e3d6d0e973485a53bb87b913059060dcfdbc (diff)