video_core: Move command buffer loop.

This moves the hot loop into video_core. This refactoring shall reduce the CPU overhead of calling ProcessCommandList.
author: Markus Wick <markus@selfnet.de> 2018-09-06 15:48:08 +0200
committer: Markus Wick <markus@selfnet.de> 2018-09-10 22:06:13 +0200
commit: 0cfb0bacb2581d79631f496afbc3a3d5dd19eb42 (patch)
tree: c6fdc90795dc4e1851e2b3e3bd792e48f19251ba /src/core/hle
parent: c5600435817ff751d2a5550ded54a033ccf0c15b (diff)
2 files changed, 12 insertions, 31 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 4cdf7f613..8e0f9a9e5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -8,6 +8,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/memory.h"
+#include "video_core/command_processor.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 
@@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
                 params.address, params.num_entries, params.flags);
 
-    ASSERT_MSG(input.size() ==
-                   sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry),
+    ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
+                                   params.num_entries * sizeof(Tegra::CommandListHeader),
                "Incorrect input size");
 
-    std::vector<IoctlGpfifoEntry> entries(params.num_entries);
+    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
-                params.num_entries * sizeof(IoctlGpfifoEntry));
-    for (auto entry : entries) {
-        Tegra::GPUVAddr va_addr = entry.Address();
-        Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
-    }
+                params.num_entries * sizeof(Tegra::CommandListHeader));
+
+    Core::System::GetInstance().GPU().ProcessCommandLists(entries);
+
     params.fence_out.id = 0;
     params.fence_out.value = 0;
     std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
@@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
                 params.address, params.num_entries, params.flags);
 
-    std::vector<IoctlGpfifoEntry> entries(params.num_entries);
+    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
     Memory::ReadBlock(params.address, entries.data(),
-                      params.num_entries * sizeof(IoctlGpfifoEntry));
+                      params.num_entries * sizeof(Tegra::CommandListHeader));
+
+    Core::System::GetInstance().GPU().ProcessCommandLists(entries);
 
-    for (auto entry : entries) {
-        Tegra::GPUVAddr va_addr = entry.Address();
-        Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
-    }
     params.fence_out.id = 0;
     params.fence_out.value = 0;
     std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 03b7356d0..baaefd79a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,7 +10,6 @@
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
-#include "video_core/memory_manager.h"
 
 namespace Service::Nvidia::Devices {
 
@@ -151,22 +150,6 @@ private:
     };
     static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
 
-    struct IoctlGpfifoEntry {
-        u32_le entry0; // gpu_va_lo
-        union {
-            u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
-            BitField<0, 8, u32_le> gpu_va_hi;
-            BitField<8, 2, u32_le> unk1;
-            BitField<10, 21, u32_le> sz;
-            BitField<31, 1, u32_le> unk2;
-        };
-
-        Tegra::GPUVAddr Address() const {
-            return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0;
-        }
-    };
-    static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
-
     struct IoctlSubmitGpfifo {
         u64_le address;     // pointer to gpfifo entry structs
         u32_le num_entries; // number of fence objects being submitted
author	Markus Wick <markus@selfnet.de>	2018-09-06 15:48:08 +0200
committer	Markus Wick <markus@selfnet.de>	2018-09-10 22:06:13 +0200
commit	0cfb0bacb2581d79631f496afbc3a3d5dd19eb42 (patch)
tree	c6fdc90795dc4e1851e2b3e3bd792e48f19251ba /src/core/hle
parent	c5600435817ff751d2a5550ded54a033ccf0c15b (diff)