diff options
| author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2020-02-11 18:59:44 -0300 | 
|---|---|---|
| committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2020-02-14 17:38:27 -0300 | 
| commit | bcd348f2388cf944f2ac49364a8d13b47cc21456 (patch) | |
| tree | 7aefb0077b4d8902bdab3f3026361173a71046e3 | |
| parent | c31382ced54c07650ae41fa2f75dc53da894784e (diff) | |
vk_query_cache: Implement generic query cache on Vulkan
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/query_cache.h | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.h | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 122 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.h | 104 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.h | 15 | 
11 files changed, 327 insertions, 20 deletions
| diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index bb5895e99..4b0c6346f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -180,6 +180,8 @@ if (ENABLE_VULKAN)          renderer_vulkan/vk_memory_manager.h          renderer_vulkan/vk_pipeline_cache.cpp          renderer_vulkan/vk_pipeline_cache.h +        renderer_vulkan/vk_query_cache.cpp +        renderer_vulkan/vk_query_cache.h          renderer_vulkan/vk_rasterizer.cpp          renderer_vulkan/vk_rasterizer.h          renderer_vulkan/vk_renderpass_cache.cpp diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 4c9151ce8..069032121 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -88,7 +88,8 @@ private:      std::shared_ptr<HostCounter> last;  }; -template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> +template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, +          class QueryPool>  class QueryCacheBase {  public:      explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) @@ -127,15 +128,25 @@ public:      /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.      void UpdateCounters() { +        std::unique_lock lock{mutex};          const auto& regs = system.GPU().Maxwell3D().regs;          Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);      }      /// Resets a counter to zero. It doesn't disable the query after resetting.      void ResetCounter(VideoCore::QueryType type) { +        std::unique_lock lock{mutex};          Stream(type).Reset();      } +    /// Disable all active streams. Expected to be called at the end of a command buffer. +    void DisableStreams() { +        std::unique_lock lock{mutex}; +        for (auto& stream : streams) { +            stream.Update(false); +        } +    } +      /// Returns a new host counter.      std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,                                           VideoCore::QueryType type) { @@ -148,6 +159,9 @@ public:          return streams[static_cast<std::size_t>(type)];      } +protected: +    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; +  private:      /// Flushes a memory range to guest memory and removes it from the cache.      void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { @@ -213,8 +227,16 @@ private:  template <class QueryCache, class HostCounter>  class HostCounterBase {  public: -    explicit HostCounterBase(std::shared_ptr<HostCounter> dependency) -        : dependency{std::move(dependency)} {} +    explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_) +        : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { +        // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. +        static constexpr u64 depth_threshold = 96; +        if (depth > depth_threshold) { +            depth = 0; +            base_result = dependency->Query(); +            dependency = nullptr; +        } +    }      /// Returns the current value of the query.      u64 Query() { @@ -222,9 +244,10 @@ public:              return *result;          } -        u64 value = BlockingQuery(); +        u64 value = BlockingQuery() + base_result;          if (dependency) {              value += dependency->Query(); +            dependency = nullptr;          }          return *(result = value); @@ -235,6 +258,10 @@ public:          return result.has_value();      } +    u64 Depth() const noexcept { +        return depth; +    } +  protected:      /// Returns the value of query from the backend API blocking as needed.      virtual u64 BlockingQuery() const = 0; @@ -242,6 +269,8 @@ protected:  private:      std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.      std::optional<u64> result;               ///< Filled with the already returned value. +    u64 depth;                               ///< Number of nested dependencies. +    u64 base_result = 0;                     ///< Equivalent to nested dependencies value.  };  template <class HostCounter> diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 7d5a044c7..f12e9f55f 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -31,15 +31,16 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {  } // Anonymous namespace  QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) -    : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, -                                  HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>( -                                                           gl_rasterizer)}, +    : VideoCommon::QueryCacheBase< +          QueryCache, CachedQuery, CounterStream, HostCounter, +          std::vector<OGLQuery>>{system, +                                 static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},        gl_rasterizer{gl_rasterizer} {}  QueryCache::~QueryCache() = default;  OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { -    auto& reserve = queries_reserve[static_cast<std::size_t>(type)]; +    auto& reserve = query_pools[static_cast<std::size_t>(type)];      OGLQuery query;      if (reserve.empty()) {          query.Create(GetTarget(type)); @@ -52,7 +53,7 @@ OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {  }  void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { -    queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query)); +    query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));  }  bool QueryCache::AnyCommandQueued() const noexcept { diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 20d337f15..99d187837 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -6,12 +6,8 @@  #include <array>  #include <memory> -#include <optional> -#include <unordered_map>  #include <vector> -#include <glad/glad.h> -  #include "common/common_types.h"  #include "video_core/query_cache.h"  #include "video_core/rasterizer_interface.h" @@ -30,8 +26,8 @@ class RasterizerOpenGL;  using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; -class QueryCache final -    : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { +class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, +                                                            HostCounter, std::vector<OGLQuery>> {  public:      explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);      ~QueryCache(); @@ -44,7 +40,6 @@ public:  private:      RasterizerOpenGL& gl_rasterizer; -    std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;  };  class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { @@ -59,7 +54,7 @@ private:      u64 BlockingQuery() const override;      QueryCache& cache; -    VideoCore::QueryType type; +    const VideoCore::QueryType type;      OGLQuery query;  }; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9840f26e5..588a6835f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan      features.depthBiasClamp = true;      features.geometryShader = true;      features.tessellationShader = true; +    features.occlusionQueryPrecise = true;      features.fragmentStoresAndAtomics = true;      features.shaderImageGatherExtended = true;      features.shaderStorageImageWriteWithoutFormat = true; @@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan      bit8_storage.uniformAndStorageBuffer8BitAccess = true;      SetNext(next, bit8_storage); +    vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; +    host_query_reset.hostQueryReset = true; +    SetNext(next, host_query_reset); +      vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;      if (is_float16_supported) {          float16_int8.shaderFloat16 = true; @@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev          VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,          VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,          VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, +        VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,      };      std::bitset<required_extensions.size()> available_extensions{}; @@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev          std::make_pair(features.depthBiasClamp, "depthBiasClamp"),          std::make_pair(features.geometryShader, "geometryShader"),          std::make_pair(features.tessellationShader, "tessellationShader"), +        std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),          std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),          std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),          std::make_pair(features.shaderStorageImageWriteWithoutFormat, @@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami          }      }; -    extensions.reserve(13); +    extensions.reserve(14);      extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);      extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);      extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami      extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);      extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);      extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); +    extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);      [[maybe_unused]] const bool nsight =          std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp new file mode 100644 index 000000000..ffbf60dda --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <utility> +#include <vector> + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +namespace { + +constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; + +constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { +    return QUERY_TARGETS[static_cast<std::size_t>(type)]; +} + +} // Anonymous namespace + +QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} + +QueryPool::~QueryPool() = default; + +void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { +    device = &device_; +    type = type_; +} + +std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { +    std::size_t index; +    do { +        index = CommitResource(fence); +    } while (usage[index]); +    usage[index] = true; + +    return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; +} + +void QueryPool::Allocate(std::size_t begin, std::size_t end) { +    usage.resize(end); + +    const auto dev = device->GetLogical(); +    const u32 size = static_cast<u32>(end - begin); +    const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); +    pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); +} + +void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { +    const auto it = +        std::find_if(std::begin(pools), std::end(pools), +                     [query_pool = query.first](auto& pool) { return query_pool == *pool; }); +    ASSERT(it != std::end(pools)); + +    const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); +    usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; +} + +VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, +                           const VKDevice& device, VKScheduler& scheduler) +    : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, +                                  QueryPool>{system, rasterizer}, +      device{device}, scheduler{scheduler} { +    for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { +        query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); +    } +} + +VKQueryCache::~VKQueryCache() = default; + +std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { +    return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); +} + +void VKQueryCache::Reserve(VideoCore::QueryType type, +                           std::pair<vk::QueryPool, std::uint32_t> query) { +    query_pools[static_cast<std::size_t>(type)].Reserve(query); +} + +HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, +                         VideoCore::QueryType type) +    : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, +      type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { +    const auto dev = cache.Device().GetLogical(); +    cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { +        dev.resetQueryPoolEXT(query.first, query.second, 1, dld); +        cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); +    }); +} + +HostCounter::~HostCounter() { +    cache.Reserve(type, query); +} + +void HostCounter::EndQuery() { +    cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { +        cmdbuf.endQuery(query.first, query.second, dld); +    }); +} + +u64 HostCounter::BlockingQuery() const { +    if (ticks >= cache.Scheduler().Ticks()) { +        cache.Scheduler().Flush(); +    } + +    const auto dev = cache.Device().GetLogical(); +    const auto& dld = cache.Device().GetDispatchLoader(); +    u64 value; +    dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), +                            vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); +    return value; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h new file mode 100644 index 000000000..c3092ee96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -0,0 +1,104 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Vulkan { + +class CachedQuery; +class HostCounter; +class VKDevice; +class VKQueryCache; +class VKScheduler; + +using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; + +class QueryPool final : public VKFencedPool { +public: +    explicit QueryPool(); +    ~QueryPool() override; + +    void Initialize(const VKDevice& device, VideoCore::QueryType type); + +    std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); + +    void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); + +protected: +    void Allocate(std::size_t begin, std::size_t end) override; + +private: +    static constexpr std::size_t GROW_STEP = 512; + +    const VKDevice* device = nullptr; +    VideoCore::QueryType type = {}; + +    std::vector<UniqueQueryPool> pools; +    std::vector<bool> usage; +}; + +class VKQueryCache final +    : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, +                                         QueryPool> { +public: +    explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, +                          const VKDevice& device, VKScheduler& scheduler); +    ~VKQueryCache(); + +    std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); + +    void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); + +    const VKDevice& Device() const noexcept { +        return device; +    } + +    VKScheduler& Scheduler() const noexcept { +        return scheduler; +    } + +private: +    const VKDevice& device; +    VKScheduler& scheduler; +}; + +class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { +public: +    explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, +                         VideoCore::QueryType type); +    ~HostCounter(); + +    void EndQuery(); + +private: +    u64 BlockingQuery() const override; + +    VKQueryCache& cache; +    const VideoCore::QueryType type; +    const std::pair<vk::QueryPool, std::uint32_t> query; +    const u64 ticks; +}; + +class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { +public: +    explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) +        : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index aada38702..79aa121ed 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind                      staging_pool),        pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),        buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), -      sampler_cache(device) {} +      sampler_cache(device), query_cache(system, *this, device, scheduler) { +    scheduler.SetQueryCache(query_cache); +}  RasterizerVulkan::~RasterizerVulkan() = default; @@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {      FlushWork(); +    query_cache.UpdateCounters(); +      const auto& gpu = system.GPU().Maxwell3D();      GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; @@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {  void RasterizerVulkan::Clear() {      MICROPROFILE_SCOPE(Vulkan_Clearing); +    query_cache.UpdateCounters(); +      const auto& gpu = system.GPU().Maxwell3D();      if (!system.GPU().Maxwell3D().ShouldExecute()) {          return; @@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {      sampled_views.clear();      image_views.clear(); +    query_cache.UpdateCounters(); +      const auto& launch_desc = system.GPU().KeplerCompute().launch_description;      const ComputePipelineCacheKey key{          code_addr, @@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {      });  } +void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { +    query_cache.ResetCounter(type); +} + +void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, +                             std::optional<u64> timestamp) { +    query_cache.Query(gpu_addr, type, timestamp); +} +  void RasterizerVulkan::FlushAll() {}  void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {      texture_cache.FlushRegion(addr, size);      buffer_cache.FlushRegion(addr, size); +    query_cache.FlushRegion(addr, size);  }  void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {      texture_cache.InvalidateRegion(addr, size);      pipeline_cache.InvalidateRegion(addr, size);      buffer_cache.InvalidateRegion(addr, size); +    query_cache.InvalidateRegion(addr, size);  }  void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7be71e734..add1ad88c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -24,6 +24,7 @@  #include "video_core/renderer_vulkan/vk_descriptor_pool.h"  #include "video_core/renderer_vulkan/vk_memory_manager.h"  #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_query_cache.h"  #include "video_core/renderer_vulkan/vk_renderpass_cache.h"  #include "video_core/renderer_vulkan/vk_resource_manager.h"  #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -96,7 +97,7 @@ struct ImageView {      vk::ImageLayout* layout = nullptr;  }; -class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {  public:      explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,                                VKScreenInfo& screen_info, const VKDevice& device, @@ -108,6 +109,8 @@ public:      bool DrawMultiBatch(bool is_indexed) override;      void Clear() override;      void DispatchCompute(GPUVAddr code_addr) override; +    void ResetCounter(VideoCore::QueryType type) override; +    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;      void FlushAll() override;      void FlushRegion(CacheAddr addr, u64 size) override;      void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -247,6 +250,7 @@ private:      VKPipelineCache pipeline_cache;      VKBufferCache buffer_cache;      VKSamplerCache sampler_cache; +    VKQueryCache query_cache;      std::array<View, Maxwell::NumRenderTargets> color_attachments;      View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index d66133ad1..92bd6c344 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -6,6 +6,7 @@  #include "common/microprofile.h"  #include "video_core/renderer_vulkan/declarations.h"  #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h"  #include "video_core/renderer_vulkan/vk_resource_manager.h"  #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {  }  void VKScheduler::AllocateNewContext() { +    ++ticks; +      std::unique_lock lock{mutex};      current_fence = next_fence;      next_fence = &resource_manager.CommitFence(); @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {      current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);      current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},                           device.GetDispatchLoader()); +    // Enable counters once again. These are disabled when a command buffer is finished. +    if (query_cache) { +        query_cache->UpdateCounters(); +    }  }  void VKScheduler::InvalidateState() { @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {  }  void VKScheduler::EndPendingOperations() { +    query_cache->DisableStreams();      EndRenderPass();  } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bcdffbba0..62fd7858b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@  #pragma once +#include <atomic>  #include <condition_variable>  #include <memory>  #include <optional> @@ -18,6 +19,7 @@ namespace Vulkan {  class VKDevice;  class VKFence; +class VKQueryCache;  class VKResourceManager;  class VKFenceView { @@ -67,6 +69,11 @@ public:      /// Binds a pipeline to the current execution context.      void BindGraphicsPipeline(vk::Pipeline pipeline); +    /// Assigns the query cache. +    void SetQueryCache(VKQueryCache& query_cache_) { +        query_cache = &query_cache_; +    } +      /// Returns true when viewports have been set in the current command buffer.      bool TouchViewports() {          return std::exchange(state.viewports, true); @@ -112,6 +119,11 @@ public:          return current_fence;      } +    /// Returns the current command buffer tick. +    u64 Ticks() const { +        return ticks; +    } +  private:      class Command {      public: @@ -205,6 +217,8 @@ private:      const VKDevice& device;      VKResourceManager& resource_manager; +    VKQueryCache* query_cache = nullptr; +      vk::CommandBuffer current_cmdbuf;      VKFence* current_fence = nullptr;      VKFence* next_fence = nullptr; @@ -227,6 +241,7 @@ private:      Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;      std::mutex mutex;      std::condition_variable cv; +    std::atomic<u64> ticks = 0;      bool quit = false;  }; | 
