diff options
| -rw-r--r-- | src/video_core/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | src/video_core/query_cache/bank_base.h | 106 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_base.h | 72 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_cache.h | 543 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_cache_base.h | 181 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_stream.h | 125 | ||||
| -rw-r--r-- | src/video_core/query_cache/types.h | 74 | 
7 files changed, 1107 insertions, 0 deletions
| diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9b13ccbab..cf9266d54 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -95,6 +95,12 @@ add_library(video_core STATIC      memory_manager.h      precompiled_headers.h      pte_kind.h +    query_cache/bank_base.h +    query_cache/query_base.h +    query_cache/query_cache_base.h +    query_cache/query_cache.h +    query_cache/query_stream.h +    query_cache/types.h      query_cache.h      rasterizer_accelerated.cpp      rasterizer_accelerated.h diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h new file mode 100644 index 000000000..4246a609d --- /dev/null +++ b/src/video_core/query_cache/bank_base.h @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <atomic> +#include <deque> +#include <utility> + + +#include "common/common_types.h" + +namespace VideoCommon { + +class BankBase { +protected: +    const size_t base_bank_size; +    size_t bank_size; +    std::atomic<size_t> references; +    size_t current_slot; + +public: +    BankBase(size_t bank_size_) +        : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {} + +    virtual ~BankBase() = default; + +    virtual std::pair<bool, size_t> Reserve() { +        if (IsClosed()) { +            return {false, bank_size}; +        } +        const size_t result = current_slot++; +        return {true, result}; +    } + +    virtual void Reset() { +        current_slot = 0; +        references = 0; +        bank_size = base_bank_size; +    } + +    size_t Size() const { +        return bank_size; +    } + +    void AddReference(size_t how_many = 1) { +        references.fetch_add(how_many, std::memory_order_relaxed); +    } + +    void CloseReference(size_t how_many = 1) { +        if (how_many > references.load(std::memory_order_relaxed)) { +            UNREACHABLE(); +        } +        references.fetch_sub(how_many, std::memory_order_relaxed); +    } + +    void Close() { +        bank_size = current_slot; +    } + +    constexpr bool IsClosed() { +        return current_slot >= bank_size; +    } + +    bool IsDead() { +        return IsClosed() && references == 0; +    } +}; + +template <typename BankType> +class BankPool { +private: +    std::deque<BankType> bank_pool; +    std::deque<size_t> bank_indices; + +public: +    BankPool() = default; +    ~BankPool() = default; + +    // Reserve a bank from the pool and return its index +    template <typename Func> +    size_t ReserveBank(Func&& builder) { +        if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) { +            size_t new_index = bank_indices.front(); +            bank_indices.pop_front(); +            bank_pool[new_index].Reset(); +            return new_index; +        } +        size_t new_index = bank_pool.size(); +        builder(bank_pool, new_index); +        bank_indices.push_back(new_index); +        return new_index; +    } + +    // Get a reference to a bank using its index +    BankType& GetBank(size_t index) { +        return bank_pool[index]; +    } + +    // Get the total number of banks in the pool +    size_t BankCount() const { +        return bank_pool.size(); +    } +}; + +} // namespace VideoCommon diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h new file mode 100644 index 000000000..485ed669c --- /dev/null +++ b/src/video_core/query_cache/query_base.h @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace VideoCommon { + +enum class QueryFlagBits : u32 { +    HasTimestamp = 1 << 0,       ///< Indicates if this query has a tiemstamp. +    IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host +    IsHostSynced = 1 << 2,       ///< Indicates if the query has been synced in the host +    IsGuestSynced = 1 << 3,      ///< Indicates if the query has been synced with the guest. +    IsHostManaged = 1 << 4,      ///< Indicates if this query points to a host query +    IsRewritten = 1 << 5,        ///< Indicates if this query was rewritten by another query +    IsInvalidated = 1 << 6,      ///< Indicates the value of th query has been nullified. +    IsOrphan = 1 << 7,           ///< Indicates the query has not been set by a guest query. +    IsFence = 1 << 8,            ///< Indicates the query is a fence. +}; +DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) + +class QueryBase { +public: +    VAddr guest_address; +    QueryFlagBits flags; +    u64 value; + +protected: +    // Default constructor +    QueryBase() : guest_address(0), flags{}, value{} {} + +    // Parameterized constructor +    QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) +        : guest_address(address), flags(flags_), value{value_} {} +}; + +class GuestQuery : public QueryBase { +public: +    // Parameterized constructor +    GuestQuery(bool isLong, VAddr address, u64 queryValue) +        : QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) { +        if (isLong) { +            flags |= QueryFlagBits::HasTimestamp; +        } +    } +}; + +class HostQueryBase : public QueryBase { +public: +    // Default constructor +    HostQueryBase() +        : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{}, +          size_banks{}, start_slot{}, size_slots{} {} + +    // Parameterized constructor +    HostQueryBase(bool isLong, VAddr address) +        : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, +          start_slot{}, size_slots{} { +        if (isLong) { +            flags |= QueryFlagBits::HasTimestamp; +        } +    } + +    u32 start_bank_id; +    u32 size_banks; +    size_t start_slot; +    size_t size_slots; +}; + +} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h new file mode 100644 index 000000000..f6af48d14 --- /dev/null +++ b/src/video_core/query_cache/query_cache.h @@ -0,0 +1,543 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <array> +#include <deque> +#include <memory> +#include <mutex> +#include <unordered_map> +#include <utility> + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "common/scope_exit.h" +#include "common/settings.h" +#include "core/memory.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/query_cache/bank_base.h" +#include "video_core/query_cache/query_base.h" +#include "video_core/query_cache/query_cache_base.h" +#include "video_core/query_cache/query_stream.h" +#include "video_core/query_cache/types.h" + +namespace VideoCommon { + +using Maxwell = Tegra::Engines::Maxwell3D; + +struct SyncValuesStruct { +    VAddr address; +    u64 value; +    u64 size; + +    static constexpr bool GeneratesBaseBuffer = true; +}; + +template <typename Traits> +class GuestStreamer : public SimpleStreamer<GuestQuery> { +public: +    using RuntimeType = typename Traits::RuntimeType; + +    GuestStreamer(size_t id_, RuntimeType& runtime_) +        : SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {} + +    virtual ~GuestStreamer() = default; + +    size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, +                        std::optional<u32> subreport = std::nullopt) override { +        auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value)); +        pending_sync.push_back(new_id); +        return new_id; +    } + +    bool HasPendingSync() override { +        return !pending_sync.empty(); +    } + +    void SyncWrites() override { +        if (pending_sync.empty()) { +            return; +        } +        std::vector<SyncValuesStruct> sync_values; +        sync_values.reserve(pending_sync.size()); +        for (size_t pending_id : pending_sync) { +            auto& query = slot_queries[pending_id]; +            if (True(query.flags & QueryFlagBits::IsRewritten) || +                True(query.flags & QueryFlagBits::IsInvalidated)) { +                continue; +            } +            query.flags |= QueryFlagBits::IsHostSynced; +            sync_values.emplace_back(query.guest_address, query.value, +                                     True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); +        } +        pending_sync.clear(); +        if (sync_values.size() > 0) { +            runtime.template SyncValues<SyncValuesStruct>(sync_values); +        } +    } + +private: +    RuntimeType& runtime; +    std::deque<size_t> pending_sync; +}; + +template <typename Traits> +class StubStreamer : public GuestStreamer<Traits> { +public: +    using RuntimeType = typename Traits::RuntimeType; + +    StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {} + +    ~StubStreamer() override = default; + +    size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, +                        std::optional<u32> subreport = std::nullopt) override { +        size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport); +        return new_id; +    } +}; + +template <typename Traits> +struct QueryCacheBase<Traits>::QueryCacheBaseImpl { +    using RuntimeType = typename Traits::RuntimeType; + +    QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, +                       Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) +        : owner{owner_}, rasterizer{rasterizer_}, +          cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { +        streamer_mask = 0; +        for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { +            streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); +            if (streamers[i]) { +                streamer_mask |= 1ULL << i; +            } +        } +    } + +    template <typename Func> +    void ForEachStreamerIn(u64 mask, Func&& func) { +        static constexpr bool RETURNS_BOOL = +            std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>; +        while (mask != 0) { +            size_t position = std::countr_zero(mask); +            mask &= ~(1ULL << position); +            if constexpr (RETURNS_BOOL) { +                if (func(streamers[position])) { +                    return; +                } +            } else { +                func(streamers[position]); +            } +        } +    } + +    template <typename Func> +    void ForEachStreamer(Func&& func) { +        ForEachStreamerIn(streamer_mask, func); +    } + +    QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) { +        size_t which_stream = location.stream_id.Value(); +        auto* streamer = streamers[which_stream]; +        if (!streamer) { +            return nullptr; +        } +        return streamer->GetQuery(location.query_id.Value()); +    } + +    QueryCacheBase<Traits>* owner; +    VideoCore::RasterizerInterface& rasterizer; +    Core::Memory::Memory& cpu_memory; +    Traits::RuntimeType& runtime; +    Tegra::GPU& gpu; +    std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; +    u64 streamer_mask; +    std::mutex flush_guard; +    std::deque<u64> flushes_pending; +    std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister; +}; + +template <typename Traits> +QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, +                                       VideoCore::RasterizerInterface& rasterizer_, +                                       Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) +    : cached_queries{} { +    impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( +        this, rasterizer_, cpu_memory_, runtime_, gpu_); +} + +template <typename Traits> +QueryCacheBase<Traits>::~QueryCacheBase() = default; + +template <typename Traits> +void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) { +    size_t index = static_cast<size_t>(counter_type); +    StreamerInterface* streamer = impl->streamers[index]; +    if (!streamer) [[unlikely]] { +        UNREACHABLE(); +        return; +    } +    if (is_enabled) { +        streamer->StartCounter(); +    } else { +        streamer->PauseCounter(); +    } +} + +template <typename Traits> +void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) { +    size_t index = static_cast<size_t>(counter_type); +    StreamerInterface* streamer = impl->streamers[index]; +    if (!streamer) [[unlikely]] { +        UNREACHABLE(); +        return; +    } +    streamer->CloseCounter(); +} + +template <typename Traits> +void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) { +    size_t index = static_cast<size_t>(counter_type); +    StreamerInterface* streamer = impl->streamers[index]; +    if (!streamer) [[unlikely]] { +        UNIMPLEMENTED(); +        return; +    } +    streamer->ResetCounter(); +} + +template <typename Traits> +void QueryCacheBase<Traits>::BindToChannel(s32 id) { +    VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id); +    impl->runtime.Bind3DEngine(maxwell3d); +} + +template <typename Traits> +void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type, +                                           QueryPropertiesFlags flags, u32 payload, u32 subreport) { +    const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout); +    const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); +    size_t streamer_id = static_cast<size_t>(counter_type); +    auto* streamer = impl->streamers[streamer_id]; +    if (!streamer) [[unlikely]] { +        if (has_timestamp) { +            u64 timestamp = impl->gpu.GetTicks(); +            gpu_memory->Write<u64>(addr + 8, timestamp); +            gpu_memory->Write<u64>(addr, 1ULL); +        } else { +            gpu_memory->Write<u32>(addr, 1U); +        } +        return; +    } +    auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); +    if (!cpu_addr_opt) [[unlikely]] { +        return; +    } +    VAddr cpu_addr = *cpu_addr_opt; +    const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); +    auto* query = streamer->GetQuery(new_query_id); +    if (is_fence) { +        query->flags |= QueryFlagBits::IsFence; +    } +    QueryLocation query_location{}; +    query_location.stream_id.Assign(static_cast<u32>(streamer_id)); +    query_location.query_id.Assign(static_cast<u32>(new_query_id)); +    const auto gen_caching_indexing = [](VAddr cur_addr) { +        return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, +                                        static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); +    }; +    u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); +    u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); +    bool is_synced = !Settings::IsGPULevelHigh() && is_fence; +    std::function<void()> operation( +        [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { +            if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { +                if (!is_synced) [[likely]] { +                    impl->pending_unregister.push_back(query_location); +                } +                return; +            } +            if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { +                UNREACHABLE(); +                return; +            } +            if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { +                u64 timestamp = impl->gpu.GetTicks(); +                std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); +                std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); +            } else { +                u32 value = static_cast<u32>(query_base->value); +                std::memcpy(pointer, &value, sizeof(value)); +            } +            if (!is_synced) [[likely]] { +                impl->pending_unregister.push_back(query_location); +            } +        }); +    if (is_fence) { +        impl->rasterizer.SignalFence(std::move(operation)); +    } else { +        impl->rasterizer.SyncOperation(std::move(operation)); +    } +    if (is_synced) { +        streamer->Free(new_query_id); +        return; +    } +    auto [cont_addr, base] = gen_caching_indexing(cpu_addr); +    { +        std::scoped_lock lock(cache_mutex); +        auto it1 = cached_queries.try_emplace(cont_addr); +        auto& sub_container = it1.first->second; +        auto it_current = sub_container.find(base); +        if (it_current == sub_container.end()) { +            sub_container.insert_or_assign(base, query_location); +            return; +        } +        auto* old_query = impl->ObtainQuery(it_current->second); +        old_query->flags |= QueryFlagBits::IsRewritten; +        sub_container.insert_or_assign(base, query_location); +    } +} + +template <typename Traits> +void QueryCacheBase<Traits>::UnregisterPending() { +    const auto gen_caching_indexing = [](VAddr cur_addr) { +        return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, +                                        static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); +    }; +    std::scoped_lock lock(cache_mutex); +    for (QueryLocation loc : impl->pending_unregister) { +        const auto [streamer_id, query_id] = loc.unpack(); +        auto* streamer = impl->streamers[streamer_id]; +        if (!streamer) [[unlikely]] { +            continue; +        } +        auto* query = streamer->GetQuery(query_id); +        auto [cont_addr, base] = gen_caching_indexing(query->guest_address); +        auto it1 = cached_queries.find(cont_addr); +        if (it1 != cached_queries.end()) { +            auto it2 = it1->second.find(base); +            if (it2 != it1->second.end()) { +                if (it2->second.raw == loc.raw) { +                    it1->second.erase(it2); +                } +            } +        } +        streamer->Free(query_id); +    } +    impl->pending_unregister.clear(); +} + +template <typename Traits> +void QueryCacheBase<Traits>::NotifyWFI() { +    bool should_sync = false; +    impl->ForEachStreamer( +        [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); }); +    if (!should_sync) { +        return; +    } + +    impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); }); +    impl->runtime.Barriers(true); +    impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); }); +    impl->runtime.Barriers(false); +} + +template <typename Traits> +void QueryCacheBase<Traits>::NotifySegment(bool resume) { +    if (resume) { +        impl->runtime.ResumeHostConditionalRendering(); +    } else { +        impl->runtime.PauseHostConditionalRendering(); +        CounterClose(VideoCommon::QueryType::ZPassPixelCount64); +        CounterClose(VideoCommon::QueryType::StreamingByteCount); +    } +} + +template <typename Traits> +bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { +    bool qc_dirty = false; +    const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData { +        auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address); +        if (!cpu_addr_opt) [[unlikely]] { +            return VideoCommon::LookupData{ +                .address = 0, +                .found_query = nullptr, +            }; +        } +        VAddr cpu_addr = *cpu_addr_opt; +        std::scoped_lock lock(cache_mutex); +        auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); +        if (it1 == cached_queries.end()) { +            return VideoCommon::LookupData{ +                .address = cpu_addr, +                .found_query = nullptr, +            }; +        } +        auto& sub_container = it1->second; +        auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); + +        if (it_current == sub_container.end()) { +            auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); +            if (it_current_2 == sub_container.end()) { +                return VideoCommon::LookupData{ +                    .address = cpu_addr, +                    .found_query = nullptr, +                }; +            } +        } +        auto* query = impl->ObtainQuery(it_current->second); +        qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && +                    False(query->flags & QueryFlagBits::IsGuestSynced); +        return VideoCommon::LookupData{ +            .address = cpu_addr, +            .found_query = query, +        }; +    }; + +    auto& regs = maxwell3d->regs; +    if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) { +        impl->runtime.EndHostConditionalRendering(); +        return false; +    } +    /*if (!Settings::IsGPULevelHigh()) { +        impl->runtime.EndHostConditionalRendering(); +        return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, +                                         VideoCommon::CacheType::BufferCache | +                                             VideoCommon::CacheType::QueryCache); +    }*/ +    const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); +    const GPUVAddr address = regs.render_enable.Address(); +    switch (mode) { +    case ComparisonMode::True: +        impl->runtime.EndHostConditionalRendering(); +        return false; +    case ComparisonMode::False: +        impl->runtime.EndHostConditionalRendering(); +        return false; +    case ComparisonMode::Conditional: { +        VideoCommon::LookupData object_1{gen_lookup(address)}; +        return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty); +    } +    case ComparisonMode::IfEqual: { +        VideoCommon::LookupData object_1{gen_lookup(address)}; +        VideoCommon::LookupData object_2{gen_lookup(address + 16)}; +        return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, +                                                                   true); +    } +    case ComparisonMode::IfNotEqual: { +        VideoCommon::LookupData object_1{gen_lookup(address)}; +        VideoCommon::LookupData object_2{gen_lookup(address + 16)}; +        return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, +                                                                   false); +    } +    default: +        return false; +    } +} + +// Async downloads +template <typename Traits> +void QueryCacheBase<Traits>::CommitAsyncFlushes() { +    u64 mask{}; +    { +        std::scoped_lock lk(impl->flush_guard); +        impl->ForEachStreamer([&mask](StreamerInterface* streamer) { +            bool local_result = streamer->HasUnsyncedQueries(); +            if (local_result) { +                mask |= 1ULL << streamer->GetId(); +            } +        }); +        impl->flushes_pending.push_back(mask); +    } +    std::function<void()> func([this] { UnregisterPending(); }); +    impl->rasterizer.SyncOperation(std::move(func)); +    if (mask == 0) { +        return; +    } +    impl->ForEachStreamerIn(mask, +                            [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); +} + +template <typename Traits> +bool QueryCacheBase<Traits>::HasUncommittedFlushes() const { +    bool result = false; +    impl->ForEachStreamer([&result](StreamerInterface* streamer) { +        result |= streamer->HasUnsyncedQueries(); +        return result; +    }); +    return result; +} + +template <typename Traits> +bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() { +    std::scoped_lock lk(impl->flush_guard); +    return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL; +} + +template <typename Traits> +void QueryCacheBase<Traits>::PopAsyncFlushes() { +    u64 mask; +    { +        std::scoped_lock lk(impl->flush_guard); +        mask = impl->flushes_pending.front(); +        impl->flushes_pending.pop_front(); +    } +    if (mask == 0) { +        return; +    } +    impl->ForEachStreamerIn(mask, +                            [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); +} + +// Invalidation + +template <typename Traits> +void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) { +    auto* query_base = impl->ObtainQuery(location); +    if (!query_base) { +        return; +    } +    query_base->flags |= QueryFlagBits::IsInvalidated; +} + +template <typename Traits> +bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { +    auto* query_base = impl->ObtainQuery(location); +    if (!query_base) { +        return false; +    } +    return True(query_base->flags & QueryFlagBits::IsHostManaged) && +           False(query_base->flags & QueryFlagBits::IsGuestSynced); +} + +template <typename Traits> +bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { +    auto* query_base = impl->ObtainQuery(location); +    if (!query_base) { +        return false; +    } +    if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && +        False(query_base->flags & QueryFlagBits::IsGuestSynced)) { +        auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); +        if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { +            std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); +            return false; +        } +        u32 value_l = static_cast<u32>(query_base->value); +        std::memcpy(ptr, &value_l, sizeof(value_l)); +        return false; +    } +    return True(query_base->flags & QueryFlagBits::IsHostManaged) && +           False(query_base->flags & QueryFlagBits::IsGuestSynced); +} + +template <typename Traits> +void QueryCacheBase<Traits>::RequestGuestHostSync() { +    impl->rasterizer.ReleaseFences(); +} + +} // namespace VideoCommon diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h new file mode 100644 index 000000000..55f508dd1 --- /dev/null +++ b/src/video_core/query_cache/query_cache_base.h @@ -0,0 +1,181 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <functional> +#include <mutex> +#include <optional> +#include <span> +#include <unordered_map> +#include <utility> + +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/control/channel_state_cache.h" +#include "video_core/query_cache/query_base.h" +#include "video_core/query_cache/types.h" + +namespace Core::Memory { +class Memory; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Tegra { +class GPU; +} + +namespace VideoCommon { + +struct LookupData { +    VAddr address; +    QueryBase* found_query; +}; + +template <typename Traits> +class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { +    using RuntimeType = typename Traits::RuntimeType; + +public: +    union QueryLocation { +        BitField<27, 5, u32> stream_id; +        BitField<0, 27, u32> query_id; +        u32 raw; + +        std::pair<size_t, size_t> unpack() { +            return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; +        } +    }; + +    explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, +                            Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); + +    ~QueryCacheBase(); + +    void InvalidateRegion(VAddr addr, std::size_t size) { +        IterateCache<true>(addr, size, +                           [this](QueryLocation location) { InvalidateQuery(location); }); +    } + +    void FlushRegion(VAddr addr, std::size_t size) { +        bool result = false; +        IterateCache<false>(addr, size, [this, &result](QueryLocation location) { +            result |= SemiFlushQueryDirty(location); +            return result; +        }); +        if (result) { +            RequestGuestHostSync(); +        } +    } + +    static u64 BuildMask(std::span<QueryType> types) { +        u64 mask = 0; +        for (auto query_type : types) { +            mask |= 1ULL << (static_cast<u64>(query_type)); +        } +        return mask; +    } + +    /// Return true when a CPU region is modified from the GPU +    [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) { +        bool result = false; +        IterateCache<false>(addr, size, [this, &result](QueryLocation location) { +            result |= IsQueryDirty(location); +            return result; +        }); +        return result; +    } + +    void CounterEnable(QueryType counter_type, bool is_enabled); + +    void CounterReset(QueryType counter_type); + +    void CounterClose(QueryType counter_type); + +    void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags, +                       u32 payload, u32 subreport); + +    void NotifyWFI(); + +    bool AccelerateHostConditionalRendering(); + +    // Async downloads +    void CommitAsyncFlushes(); + +    bool HasUncommittedFlushes() const; + +    bool ShouldWaitAsyncFlushes(); + +    void PopAsyncFlushes(); + +    void NotifySegment(bool resume); + +    void BindToChannel(s32 id) override; + +protected: +    template <bool remove_from_cache, typename Func> +    void IterateCache(VAddr addr, std::size_t size, Func&& func) { +        static constexpr bool RETURNS_BOOL = +            std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>; +        const u64 addr_begin = addr; +        const u64 addr_end = addr_begin + size; + +        const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; +        std::scoped_lock lock(cache_mutex); +        for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { +            const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; +            const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { +                const u64 cache_begin = page_start + query_location; +                const u64 cache_end = cache_begin + sizeof(u32); +                return cache_begin < addr_end && addr_begin < cache_end; +            }; +            const auto& it = cached_queries.find(page); +            if (it == std::end(cached_queries)) { +                continue; +            } +            auto& contents = it->second; +            for (auto& query : contents) { +                if (!in_range(query.first)) { +                    continue; +                } +                if constexpr (RETURNS_BOOL) { +                    if (func(query.second)) { +                        return; +                    } +                } else { +                    func(query.second); +                } +            } +            if constexpr (remove_from_cache) { +                const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) { +                    return in_range(pair.first); +                }; +                std::erase_if(contents, in_range2); +            } +        } +    } + +    using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; + +    void InvalidateQuery(QueryLocation location); +    bool IsQueryDirty(QueryLocation location); +    bool SemiFlushQueryDirty(QueryLocation location); +    void RequestGuestHostSync(); +    void UnregisterPending(); + +    std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries; +    std::mutex cache_mutex; + +    struct QueryCacheBaseImpl; +    friend struct QueryCacheBaseImpl; +    friend RuntimeType; + +    std::unique_ptr<struct QueryCacheBaseImpl> impl; +}; + +} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h new file mode 100644 index 000000000..dd5f95b3c --- /dev/null +++ b/src/video_core/query_cache/query_stream.h @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <deque> +#include <optional> +#include <vector> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/query_cache/bank_base.h" +#include "video_core/query_cache/query_base.h" + +namespace VideoCommon { + +class StreamerInterface { +public: +    StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} +    virtual ~StreamerInterface() = default; + +    virtual QueryBase* GetQuery(size_t id) = 0; + +    virtual void StartCounter() { +        /* Do Nothing */ +    } + +    virtual void PauseCounter() { +        /* Do Nothing */ +    } + +    virtual void ResetCounter() { +        /* Do Nothing */ +    } + +    virtual void CloseCounter() { +        /* Do Nothing */ +    } + +    virtual bool HasPendingSync() { +        return false; +    } + +    virtual void PresyncWrites() { +        /* Do Nothing */ +    } + +    virtual void SyncWrites() { +        /* Do Nothing */ +    } + +    virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, +                                std::optional<u32> subreport = std::nullopt) = 0; + +    virtual bool HasUnsyncedQueries() { +        return false; +    } + +    virtual void PushUnsyncedQueries() { +        /* Do Nothing */ +    } + +    virtual void PopUnsyncedQueries() { +        /* Do Nothing */ +    } + +    virtual void Free(size_t query_id) = 0; + +    size_t GetId() const { +        return id; +    } + +protected: +    const size_t id; +    const u64 dependance_mask; +}; + +template <typename QueryType> +class SimpleStreamer : public StreamerInterface { +public: +    SimpleStreamer(size_t id_) : StreamerInterface{id_} {} +    virtual ~SimpleStreamer() = default; + +protected: +    virtual QueryType* GetQuery(size_t query_id) override { +        if (query_id < slot_queries.size()) { +            return &slot_queries[query_id]; +        } +        return nullptr; +    } + +    virtual void Free(size_t query_id) override { +        std::scoped_lock lk(guard); +        ReleaseQuery(query_id); +    } + +    template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))> +    size_t BuildQuery(Args&&... args) { +        std::scoped_lock lk(guard); +        if (!old_queries.empty()) { +            size_t new_id = old_queries.front(); +            old_queries.pop_front(); +            new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...); +            return new_id; +        } +        size_t new_id = slot_queries.size(); +        slot_queries.emplace_back(std::forward<Args>(args)...); +        return new_id; +    } + +    void ReleaseQuery(size_t query_id) { + +        if (query_id < slot_queries.size()) { +            old_queries.push_back(query_id); +            return; +        } +        UNREACHABLE(); +    } + +    std::mutex guard; +    std::deque<QueryType> slot_queries; +    std::deque<size_t> old_queries; +}; + +} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/query_cache/types.h b/src/video_core/query_cache/types.h new file mode 100644 index 000000000..e9226bbfc --- /dev/null +++ b/src/video_core/query_cache/types.h @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace VideoCommon { + +enum class QueryPropertiesFlags : u32 { +    HasTimeout = 1 << 0, +    IsAFence = 1 << 1, +}; +DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags) + +// This should always be equivalent to maxwell3d Report Semaphore Reports +enum class QueryType : u32 { +    Payload = 0, // "None" in docs, but confirmed via hardware to return the payload +    VerticesGenerated = 1, +    ZPassPixelCount = 2, +    PrimitivesGenerated = 3, +    AlphaBetaClocks = 4, +    VertexShaderInvocations = 5, +    StreamingPrimitivesNeededMinusSucceeded = 6, +    GeometryShaderInvocations = 7, +    GeometryShaderPrimitivesGenerated = 9, +    ZCullStats0 = 10, +    StreamingPrimitivesSucceeded = 11, +    ZCullStats1 = 12, +    StreamingPrimitivesNeeded = 13, +    ZCullStats2 = 14, +    ClipperInvocations = 15, +    ZCullStats3 = 16, +    ClipperPrimitivesGenerated = 17, +    VtgPrimitivesOut = 18, +    PixelShaderInvocations = 19, +    ZPassPixelCount64 = 21, +    IEEECleanColorTarget = 24, +    IEEECleanZetaTarget = 25, +    StreamingByteCount = 26, +    TessellationInitInvocations = 27, +    BoundingRectangle = 28, +    TessellationShaderInvocations = 29, +    TotalStreamingPrimitivesNeededMinusSucceeded = 30, +    TessellationShaderPrimitivesGenerated = 31, +    // max. +    MaxQueryTypes, +}; + +// Comparison modes for Host Conditional Rendering +enum class ComparisonMode : u32 { +    False = 0, +    True = 1, +    Conditional = 2, +    IfEqual = 3, +    IfNotEqual = 4, +    MaxComparisonMode, +}; + +// Reduction ops. +enum class ReductionOp : u32 { +    RedAdd = 0, +    RedMin = 1, +    RedMax = 2, +    RedInc = 3, +    RedDec = 4, +    RedAnd = 5, +    RedOr = 6, +    RedXor = 7, +    MaxReductionOp, +}; + +} // namespace VideoCommon
\ No newline at end of file | 
