diff options
| -rw-r--r-- | src/core/core.cpp | 32 | ||||
| -rw-r--r-- | src/core/core.h | 11 | ||||
| -rw-r--r-- | src/core/gpu_dirty_memory_manager.h | 122 | ||||
| -rw-r--r-- | src/core/memory.cpp | 40 | ||||
| -rw-r--r-- | src/core/memory.h | 6 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 39 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 5 | ||||
| -rw-r--r-- | src/video_core/fence_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_null/null_rasterizer.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_null/null_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader_cache.h | 2 | 
20 files changed, 329 insertions, 41 deletions
| diff --git a/src/core/core.cpp b/src/core/core.cpp index b74fd0a58..9e3eb3795 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -27,6 +27,7 @@  #include "core/file_sys/savedata_factory.h"  #include "core/file_sys/vfs_concat.h"  #include "core/file_sys/vfs_real.h" +#include "core/gpu_dirty_memory_manager.h"  #include "core/hid/hid_core.h"  #include "core/hle/kernel/k_memory_manager.h"  #include "core/hle/kernel/k_process.h" @@ -130,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,  struct System::Impl {      explicit Impl(System& system)          : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, -          cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} +          cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system}, +          gpu_dirty_memory_write_manager{} { +        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); +    }      void Initialize(System& system) {          device_memory = std::make_unique<Core::DeviceMemory>(); @@ -234,6 +238,8 @@ struct System::Impl {          // Setting changes may require a full system reinitialization (e.g., disabling multicore).          ReinitializeIfNecessary(system); +        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); +          kernel.Initialize();          cpu_manager.Initialize(); @@ -540,6 +546,9 @@ struct System::Impl {      std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};      std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; + +    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> +        gpu_dirty_memory_write_manager{};  };  System::System() : impl{std::make_unique<Impl>(*this)} {} @@ -629,10 +638,31 @@ void System::PrepareReschedule(const u32 core_index) {      impl->kernel.PrepareReschedule(core_index);  } +Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() { +    const std::size_t core = impl->kernel.GetCurrentHostThreadID(); +    return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES +                                                    ? core +                                                    : Core::Hardware::NUM_CPU_CORES - 1]; +} + +/// Provides a constant reference to the current gou dirty memory manager. +const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const { +    const std::size_t core = impl->kernel.GetCurrentHostThreadID(); +    return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES +                                                    ? core +                                                    : Core::Hardware::NUM_CPU_CORES - 1]; +} +  size_t System::GetCurrentHostThreadID() const {      return impl->kernel.GetCurrentHostThreadID();  } +void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { +    for (auto& manager : impl->gpu_dirty_memory_write_manager) { +        manager.Gather(callback); +    } +} +  PerfStatsResults System::GetAndResetPerfStats() {      return impl->GetAndResetPerfStats();  } diff --git a/src/core/core.h b/src/core/core.h index 93afc9303..14b2f7785 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -108,9 +108,10 @@ class CpuManager;  class Debugger;  class DeviceMemory;  class ExclusiveMonitor; -class SpeedLimiter; +class GPUDirtyMemoryManager;  class PerfStats;  class Reporter; +class SpeedLimiter;  class TelemetrySession;  struct PerfStatsResults; @@ -225,6 +226,14 @@ public:      /// Prepare the core emulation for a reschedule      void PrepareReschedule(u32 core_index); +    /// Provides a reference to the gou dirty memory manager. +    [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager(); + +    /// Provides a constant reference to the current gou dirty memory manager. +    [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const; + +    void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); +      [[nodiscard]] size_t GetCurrentHostThreadID() const;      /// Gets and resets core performance statistics diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h new file mode 100644 index 000000000..9687531e8 --- /dev/null +++ b/src/core/gpu_dirty_memory_manager.h @@ -0,0 +1,122 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <atomic> +#include <bit> +#include <functional> +#include <mutex> +#include <utility> +#include <vector> + +#include "core/memory.h" + +namespace Core { + +class GPUDirtyMemoryManager { +public: +    GPUDirtyMemoryManager() : current{default_transform} { +        back_buffer.reserve(256); +        front_buffer.reserve(256); +    } + +    ~GPUDirtyMemoryManager() = default; + +    void Collect(VAddr address, size_t size) { +        TransformAddress t = BuildTransform(address, size); +        TransformAddress tmp, original; +        do { +            tmp = current.load(std::memory_order_acquire); +            original = tmp; +            if (tmp.address != t.address) { +                if (IsValid(tmp.address)) { +                    std::scoped_lock lk(guard); +                    back_buffer.emplace_back(tmp); +                    current.exchange(t, std::memory_order_relaxed); +                    return; +                } +                tmp.address = t.address; +                tmp.mask = 0; +            } +            if ((tmp.mask | t.mask) == tmp.mask) { +                return; +            } +            tmp.mask |= t.mask; +        } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release, +                                                std::memory_order_relaxed)); +    } + +    void Gather(std::function<void(VAddr, size_t)>& callback) { +        { +            std::scoped_lock lk(guard); +            TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); +            front_buffer.swap(back_buffer); +            if (IsValid(t.address)) { +                front_buffer.emplace_back(t); +            } +        } +        for (auto& transform : front_buffer) { +            size_t offset = 0; +            u64 mask = transform.mask; +            while (mask != 0) { +                const size_t empty_bits = std::countr_zero(mask); +                offset += empty_bits << align_bits; +                mask = mask >> empty_bits; + +                const size_t continuous_bits = std::countr_one(mask); +                callback((static_cast<VAddr>(transform.address) << page_bits) + offset, +                         continuous_bits << align_bits); +                mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; +                offset += continuous_bits << align_bits; +            } +        } +        front_buffer.clear(); +    } + +private: +    struct alignas(8) TransformAddress { +        u32 address; +        u32 mask; +    }; + +    constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; +    constexpr static size_t page_size = 1ULL << page_bits; +    constexpr static size_t page_mask = page_size - 1; + +    constexpr static size_t align_bits = 6U; +    constexpr static size_t align_size = 1U << align_bits; +    constexpr static size_t align_mask = align_size - 1; +    constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; + +    bool IsValid(VAddr address) { +        return address < (1ULL << 39); +    } + +    template <typename T> +    T CreateMask(size_t top_bit, size_t minor_bit) { +        T mask = ~T(0); +        mask <<= (sizeof(T) * 8 - top_bit); +        mask >>= (sizeof(T) * 8 - top_bit); +        mask >>= minor_bit; +        mask <<= minor_bit; +        return mask; +    } + +    TransformAddress BuildTransform(VAddr address, size_t size) { +        const size_t minor_address = address & page_mask; +        const size_t minor_bit = minor_address >> align_bits; +        const size_t top_bit = (minor_address + size + align_mask) >> align_bits; +        TransformAddress result{}; +        result.address = static_cast<u32>(address >> page_bits); +        result.mask = CreateMask<u32>(top_bit, minor_bit); +        return result; +    } + +    std::atomic<TransformAddress> current{}; +    std::mutex guard; +    std::vector<TransformAddress> back_buffer; +    std::vector<TransformAddress> front_buffer; +}; + +} // namespace Core diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 514ba0d66..257406f09 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -3,6 +3,7 @@  #include <algorithm>  #include <cstring> +#include <span>  #include "common/assert.h"  #include "common/atomic_ops.h" @@ -13,6 +14,7 @@  #include "common/swap.h"  #include "core/core.h"  #include "core/device_memory.h" +#include "core/gpu_dirty_memory_manager.h"  #include "core/hardware_properties.h"  #include "core/hle/kernel/k_page_table.h"  #include "core/hle/kernel/k_process.h" @@ -678,7 +680,7 @@ struct Memory::Impl {                  LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,                            GetInteger(vaddr), static_cast<u64>(data));              }, -            [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); +            [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });          if (ptr) {              std::memcpy(ptr, &data, sizeof(T));          } @@ -692,7 +694,7 @@ struct Memory::Impl {                  LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",                            sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));              }, -            [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); +            [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });          if (ptr) {              const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);              return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); @@ -707,7 +709,7 @@ struct Memory::Impl {                  LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",                            GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));              }, -            [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); }); +            [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });          if (ptr) {              const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);              return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); @@ -717,7 +719,7 @@ struct Memory::Impl {      void HandleRasterizerDownload(VAddr address, size_t size) {          const size_t core = system.GetCurrentHostThreadID(); -        auto& current_area = rasterizer_areas[core]; +        auto& current_area = rasterizer_read_areas[core];          const VAddr end_address = address + size;          if (current_area.start_address <= address && end_address <= current_area.end_address)              [[likely]] { @@ -726,9 +728,31 @@ struct Memory::Impl {          current_area = system.GPU().OnCPURead(address, size);      } -    Common::PageTable* current_page_table = nullptr; -    std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; +    void HandleRasterizerWrite(VAddr address, size_t size) { +        const size_t core = system.GetCurrentHostThreadID(); +        auto& current_area = rasterizer_write_areas[core]; +        VAddr subaddress = address >> YUZU_PAGEBITS; +        bool do_collection = current_area.last_address == subaddress; +        if (!do_collection) [[unlikely]] { +            do_collection = system.GPU().OnCPUWrite(address, size); +            if (!do_collection) { +                return; +            } +            current_area.last_address = subaddress; +        } +        gpu_dirty_managers[core].Collect(address, size); +    } + +    struct GPUDirtyState { +        VAddr last_address; +    }; +      Core::System& system; +    Common::PageTable* current_page_table = nullptr; +    std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> +        rasterizer_read_areas{}; +    std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; +    std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;  };  Memory::Memory(Core::System& system_) : system{system_} { @@ -876,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)      impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);  } +void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { +    impl->gpu_dirty_managers = managers; +} +  Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {      return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);  } diff --git a/src/core/memory.h b/src/core/memory.h index 72a0be813..ea01824f8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -5,6 +5,7 @@  #include <cstddef>  #include <memory> +#include <span>  #include <string>  #include "common/typed_address.h"  #include "core/hle/result.h" @@ -15,7 +16,8 @@ struct PageTable;  namespace Core {  class System; -} +class GPUDirtyMemoryManager; +} // namespace Core  namespace Kernel {  class PhysicalMemory; @@ -458,6 +460,8 @@ public:       */      void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); +    void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); +  private:      Core::System& system; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 58a45ab67..b5ed3380f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {  template <class P>  void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { -    memory_tracker.CachedCpuWrite(cpu_addr, size); +    const bool is_dirty = IsRegionRegistered(cpu_addr, size); +    if (!is_dirty) { +        return; +    } +    VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); +    VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); +    if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { +        WriteMemory(cpu_addr, size); +        return; +    } + +    tmp_buffer.resize_destructive(size); +    cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); + +    InlineMemoryImplementation(cpu_addr, size, tmp_buffer); +} + +template <class P> +bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { +    const bool is_dirty = IsRegionRegistered(cpu_addr, size); +    if (!is_dirty) { +        return false; +    } +    if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { +        return true; +    } +    WriteMemory(cpu_addr, size); +    return false;  }  template <class P> @@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,          return false;      } +    InlineMemoryImplementation(dest_address, copy_size, inlined_buffer); + +    return true; +} + +template <class P> +void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, +                                                std::span<const u8> inlined_buffer) {      const IntervalType subtract_interval{dest_address, dest_address + copy_size};      ClearDownload(subtract_interval);      common_ranges.subtract(subtract_interval); @@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,      } else {          buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));      } - -    return true;  }  template <class P> diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index fe6068cfe..460fc7551 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -245,6 +245,8 @@ public:      void CachedWriteMemory(VAddr cpu_addr, u64 size); +    bool OnCPUWrite(VAddr cpu_addr, u64 size); +      void DownloadMemory(VAddr cpu_addr, u64 size);      std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); @@ -543,6 +545,9 @@ private:      void ClearDownload(IntervalType subtract_interval); +    void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, +                                    std::span<const u8> inlined_buffer); +      VideoCore::RasterizerInterface& rasterizer;      Core::Memory::Memory& cpu_memory; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 35d699bbf..ab20ff30f 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -69,7 +69,6 @@ public:      }      void SignalFence(std::function<void()>&& func) { -        rasterizer.InvalidateGPUCache();          bool delay_fence = Settings::IsGPULevelHigh();          if constexpr (!can_async_check) {              TryReleasePendingFences<false>(); @@ -96,6 +95,7 @@ public:              guard.unlock();              cv.notify_all();          } +        rasterizer.InvalidateGPUCache();      }      void SignalSyncPoint(u32 value) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index db385076d..c192e33b2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -95,7 +95,9 @@ struct GPU::Impl {      /// Synchronizes CPU writes with Host GPU memory.      void InvalidateGPUCache() { -        rasterizer->InvalidateGPUCache(); +        std::function<void(VAddr, size_t)> callback_writes( +            [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); +        system.GatherGPUDirtyMemory(callback_writes);      }      /// Signal the ending of command list. @@ -299,6 +301,10 @@ struct GPU::Impl {          gpu_thread.InvalidateRegion(addr, size);      } +    bool OnCPUWrite(VAddr addr, u64 size) { +        return rasterizer->OnCPUWrite(addr, size); +    } +      /// Notify rasterizer that any caches of the specified region should be flushed and invalidated      void FlushAndInvalidateRegion(VAddr addr, u64 size) {          gpu_thread.FlushAndInvalidateRegion(addr, size); @@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {      impl->InvalidateRegion(addr, size);  } +bool GPU::OnCPUWrite(VAddr addr, u64 size) { +    return impl->OnCPUWrite(addr, size); +} +  void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {      impl->FlushAndInvalidateRegion(addr, size);  } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e49c40cf2..ba2838b89 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -250,6 +250,10 @@ public:      /// Notify rasterizer that any caches of the specified region should be invalidated      void InvalidateRegion(VAddr addr, u64 size); +    /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is +    /// sensible, false otherwise +    bool OnCPUWrite(VAddr addr, u64 size); +      /// Notify rasterizer that any caches of the specified region should be flushed and invalidated      void FlushAndInvalidateRegion(VAddr addr, u64 size); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 889144f38..2f0f9f593 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,          } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {              rasterizer->FlushRegion(flush->addr, flush->size);          } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { -            rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); +            rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);          } else {              ASSERT(false);          } @@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {  }  void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { -    rasterizer->OnCPUWrite(addr, size); +    rasterizer->OnCacheInvalidation(addr, size);  }  void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {      // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important -    rasterizer->OnCPUWrite(addr, size); +    rasterizer->OnCacheInvalidation(addr, size);  }  u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 7566a8c4e..cb8029a4f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -109,7 +109,9 @@ public:      }      /// Notify rasterizer that any caches of the specified region are desync with guest -    virtual void OnCPUWrite(VAddr addr, u64 size) = 0; +    virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; + +    virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;      /// Sync memory between guest and host.      virtual void InvalidateGPUCache() = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index bf2ce4c49..92ecf6682 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp      return false;  }  void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} -void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} +bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { +    return false; +} +void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}  VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {      VideoCore::RasterizerDownloadArea new_area{          .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index a8d35d2c1..93b9a6971 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -53,7 +53,8 @@ public:                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override;      void InvalidateRegion(VAddr addr, u64 size,                            VideoCommon::CacheType which = VideoCommon::CacheType::All) override; -    void OnCPUWrite(VAddr addr, u64 size) override; +    void OnCacheInvalidation(VAddr addr, u64 size) override; +    bool OnCPUWrite(VAddr addr, u64 size) override;      VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;      void InvalidateGPUCache() override;      void UnmapMemory(VAddr addr, u64 size) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index edf527f2d..aadd6967c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache      }  } -void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { +bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { +    MICROPROFILE_SCOPE(OpenGL_CacheManagement); +    if (addr == 0 || size == 0) { +        return false; +    } + +    { +        std::scoped_lock lock{buffer_cache.mutex}; +        if (buffer_cache.OnCPUWrite(addr, size)) { +            return true; +        } +    } + +    { +        std::scoped_lock lock{texture_cache.mutex}; +        texture_cache.WriteMemory(addr, size); +    } + +    shader_cache.InvalidateRegion(addr, size); +    return false; +} + +void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {      MICROPROFILE_SCOPE(OpenGL_CacheManagement);      if (addr == 0 || size == 0) {          return;      } -    shader_cache.OnCPUWrite(addr, size);      {          std::scoped_lock lock{texture_cache.mutex};          texture_cache.WriteMemory(addr, size); @@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {          std::scoped_lock lock{buffer_cache.mutex};          buffer_cache.CachedWriteMemory(addr, size);      } +    shader_cache.InvalidateRegion(addr, size);  }  void RasterizerOpenGL::InvalidateGPUCache() { -    MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    shader_cache.SyncGuestHost(); -    { -        std::scoped_lock lock{buffer_cache.mutex}; -        buffer_cache.FlushCachedWrites(); -    } +    gpu.InvalidateGPUCache();  }  void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { @@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {          std::scoped_lock lock{buffer_cache.mutex};          buffer_cache.WriteMemory(addr, size);      } -    shader_cache.OnCPUWrite(addr, size); +    shader_cache.OnCacheInvalidation(addr, size);  }  void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a73ad15c1..8eda2ddba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -98,7 +98,8 @@ public:      VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;      void InvalidateRegion(VAddr addr, u64 size,                            VideoCommon::CacheType which = VideoCommon::CacheType::All) override; -    void OnCPUWrite(VAddr addr, u64 size) override; +    void OnCacheInvalidation(VAddr addr, u64 size) override; +    bool OnCPUWrite(VAddr addr, u64 size) override;      void InvalidateGPUCache() override;      void UnmapMemory(VAddr addr, u64 size) override;      void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f7c0d939a..456bb040e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s      }  } -void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { +bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { +    if (addr == 0 || size == 0) { +        return false; +    } + +    { +        std::scoped_lock lock{buffer_cache.mutex}; +        if (buffer_cache.OnCPUWrite(addr, size)) { +            return true; +        } +    } + +    { +        std::scoped_lock lock{texture_cache.mutex}; +        texture_cache.WriteMemory(addr, size); +    } + +    pipeline_cache.InvalidateRegion(addr, size); +    return false; +} + +void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {      if (addr == 0 || size == 0) {          return;      } -    pipeline_cache.OnCPUWrite(addr, size); +      {          std::scoped_lock lock{texture_cache.mutex};          texture_cache.WriteMemory(addr, size); @@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {          std::scoped_lock lock{buffer_cache.mutex};          buffer_cache.CachedWriteMemory(addr, size);      } +    pipeline_cache.InvalidateRegion(addr, size);  }  void RasterizerVulkan::InvalidateGPUCache() { -    pipeline_cache.SyncGuestHost(); -    { -        std::scoped_lock lock{buffer_cache.mutex}; -        buffer_cache.FlushCachedWrites(); -    } +    gpu.InvalidateGPUCache();  }  void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { @@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {          std::scoped_lock lock{buffer_cache.mutex};          buffer_cache.WriteMemory(addr, size);      } -    pipeline_cache.OnCPUWrite(addr, size); +    pipeline_cache.OnCacheInvalidation(addr, size);  }  void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b39710b3c..73257d964 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -96,7 +96,8 @@ public:      void InvalidateRegion(VAddr addr, u64 size,                            VideoCommon::CacheType which = VideoCommon::CacheType::All) override;      void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; -    void OnCPUWrite(VAddr addr, u64 size) override; +    void OnCacheInvalidation(VAddr addr, u64 size) override; +    bool OnCPUWrite(VAddr addr, u64 size) override;      void InvalidateGPUCache() override;      void UnmapMemory(VAddr addr, u64 size) override;      void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 4db948b6d..01701201d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {      RemovePendingShaders();  } -void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { +void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {      std::scoped_lock lock{invalidation_mutex};      InvalidatePagesInRegion(addr, size);  } diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index f3cc4c70b..de8e08002 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -62,7 +62,7 @@ public:      /// @brief Unmarks a memory region as cached and marks it for removal      /// @param addr Start address of the CPU write operation      /// @param size Number of bytes of the CPU write operation -    void OnCPUWrite(VAddr addr, size_t size); +    void OnCacheInvalidation(VAddr addr, size_t size);      /// @brief Flushes delayed removal operations      void SyncGuestHost(); | 
