diff options
| -rw-r--r-- | src/tests/video_core/memory_tracker.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/word_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/rasterizer_accelerated.cpp | 99 | ||||
| -rw-r--r-- | src/video_core/rasterizer_accelerated.h | 29 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 10 | 
7 files changed, 73 insertions, 79 deletions
| diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp index 618793668..2dbff21af 100644 --- a/src/tests/video_core/memory_tracker.cpp +++ b/src/tests/video_core/memory_tracker.cpp @@ -23,13 +23,13 @@ constexpr VAddr c = 16 * HIGH_PAGE_SIZE;  class RasterizerInterface {  public: -    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { +    void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) {          const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};          const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>                             Core::Memory::YUZU_PAGEBITS};          for (u64 page = page_start; page < page_end; ++page) {              int& value = page_table[page]; -            value += delta; +            value += (cache ? 1 : -1);              if (value < 0) {                  throw std::logic_error{"negative page"};              } @@ -546,4 +546,4 @@ TEST_CASE("MemoryTracker: Cached write downloads") {      REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));      memory_track->MarkRegionAsCpuModified(c, WORD);      REQUIRE(rasterizer.Count() == 0); -}
\ No newline at end of file +} diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index a336bde41..95b752055 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -473,7 +473,7 @@ private:          VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;          IteratePages(changed_bits, [&](size_t offset, size_t size) {              rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, -                                               size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); +                                               size * BYTES_PER_PAGE, add_to_rasterizer);          });      } diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index f200a650f..3c9477f6e 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -3,6 +3,7 @@  #include <atomic> +#include "common/alignment.h"  #include "common/assert.h"  #include "common/common_types.h"  #include "common/div_ceil.h" @@ -11,61 +12,65 @@  namespace VideoCore { +static constexpr u16 IdentityValue = 1; +  using namespace Core::Memory; -RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) -    : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {} +RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : map{}, cpu_memory{cpu_memory_} { +    // We are tracking CPU memory, which cannot map more than 39 bits. +    const VAddr start_address = 0; +    const VAddr end_address = (1ULL << 39); +    const IntervalType address_space_interval(start_address, end_address); +    const auto value = std::make_pair(address_space_interval, IdentityValue); + +    map.add(value); +}  RasterizerAccelerated::~RasterizerAccelerated() = default; -void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { -    u64 uncache_begin = 0; -    u64 cache_begin = 0; -    u64 uncache_bytes = 0; -    u64 cache_bytes = 0; - -    std::atomic_thread_fence(std::memory_order_acquire); -    const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); -    for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { -        std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); - -        if (delta > 0) { -            ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); -        } else if (delta < 0) { -            ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); -        } else { -            ASSERT_MSG(false, "Delta must be non-zero!"); -        } +void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) { +    std::scoped_lock lk{map_lock}; -        // Adds or subtracts 1, as count is a unsigned 8-bit value -        count.fetch_add(static_cast<u16>(delta), std::memory_order_release); - -        // Assume delta is either -1 or 1 -        if (count.load(std::memory_order::relaxed) == 0) { -            if (uncache_bytes == 0) { -                uncache_begin = page; -            } -            uncache_bytes += YUZU_PAGESIZE; -        } else if (uncache_bytes > 0) { -            cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, -                                                  false); -            uncache_bytes = 0; -        } -        if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { -            if (cache_bytes == 0) { -                cache_begin = page; -            } -            cache_bytes += YUZU_PAGESIZE; -        } else if (cache_bytes > 0) { -            cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); -            cache_bytes = 0; +    // Align sizes. +    addr = Common::AlignDown(addr, YUZU_PAGESIZE); +    size = Common::AlignUp(size, YUZU_PAGESIZE); + +    // Declare the overall interval we are going to operate on. +    const VAddr start_address = addr; +    const VAddr end_address = addr + size; +    const IntervalType modification_range(start_address, end_address); + +    // Find the boundaries of where to iterate. +    const auto lower = map.lower_bound(modification_range); +    const auto upper = map.upper_bound(modification_range); + +    // Iterate over the contained intervals. +    for (auto it = lower; it != upper; it++) { +        // Intersect interval range with modification range. +        const auto current_range = modification_range & it->first; + +        // Calculate the address and size to operate over. +        const auto current_addr = current_range.lower(); +        const auto current_size = current_range.upper() - current_addr; + +        // Get the current value of the range. +        const auto value = it->second; + +        if (cache && value == IdentityValue) { +            // If we are going to cache, and the value is not yet referenced, then cache this range. +            cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, true); +        } else if (!cache && value == IdentityValue + 1) { +            // If we are going to uncache, and this is the last reference, then uncache this range. +            cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, false);          }      } -    if (uncache_bytes > 0) { -        cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false); -    } -    if (cache_bytes > 0) { -        cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); + +    // Update the set. +    const auto value = std::make_pair(modification_range, IdentityValue); +    if (cache) { +        map.add(value); +    } else { +        map.subtract(value);      }  } diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index e6c0ea87a..f1968f186 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -3,8 +3,8 @@  #pragma once -#include <array> -#include <atomic> +#include <mutex> +#include <boost/icl/interval_map.hpp>  #include "common/common_types.h"  #include "video_core/rasterizer_interface.h" @@ -21,28 +21,17 @@ public:      explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_);      ~RasterizerAccelerated() override; -    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; +    void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) override;  private: -    class CacheEntry final { -    public: -        CacheEntry() = default; +    using PageIndex = VAddr; +    using PageReferenceCount = u16; -        std::atomic_uint16_t& Count(std::size_t page) { -            return values[page & 3]; -        } +    using IntervalMap = boost::icl::interval_map<PageIndex, PageReferenceCount>; +    using IntervalType = IntervalMap::interval_type; -        const std::atomic_uint16_t& Count(std::size_t page) const { -            return values[page & 3]; -        } - -    private: -        std::array<std::atomic_uint16_t, 4> values{}; -    }; -    static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); - -    using CachedPages = std::array<CacheEntry, 0x2000000>; -    std::unique_ptr<CachedPages> cached_pages; +    IntervalMap map; +    std::mutex map_lock;      Core::Memory::Memory& cpu_memory;  }; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index af1469147..fd42d26b5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -162,7 +162,7 @@ public:      }      /// Increase/decrease the number of object in pages touching the specified region -    virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} +    virtual void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) {}      /// Initialize disk cached resources for the game being emulated      virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index e81cd031b..a109f9cbe 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -132,7 +132,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t      storage.push_back(std::move(data)); -    rasterizer.UpdatePagesCachedCount(addr, size, 1); +    rasterizer.UpdatePagesCachedCount(addr, size, true);  }  void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { @@ -209,7 +209,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) {      const VAddr addr = entry->addr_start;      const size_t size = entry->addr_end - addr; -    rasterizer.UpdatePagesCachedCount(addr, size, -1); +    rasterizer.UpdatePagesCachedCount(addr, size, false);  }  void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d5a1709f..d7941f6a4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -2080,7 +2080,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {      ASSERT(False(image.flags & ImageFlagBits::Tracked));      image.flags |= ImageFlagBits::Tracked;      if (False(image.flags & ImageFlagBits::Sparse)) { -        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, true);          return;      }      if (True(image.flags & ImageFlagBits::Registered)) { @@ -2091,13 +2091,13 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {              const auto& map = slot_map_views[map_view_id];              const VAddr cpu_addr = map.cpu_addr;              const std::size_t size = map.size; -            rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); +            rasterizer.UpdatePagesCachedCount(cpu_addr, size, true);          }          return;      }      ForEachSparseSegment(image,                           [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { -                             rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); +                             rasterizer.UpdatePagesCachedCount(cpu_addr, size, true);                           });  } @@ -2106,7 +2106,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {      ASSERT(True(image.flags & ImageFlagBits::Tracked));      image.flags &= ~ImageFlagBits::Tracked;      if (False(image.flags & ImageFlagBits::Sparse)) { -        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, false);          return;      }      ASSERT(True(image.flags & ImageFlagBits::Registered)); @@ -2117,7 +2117,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {          const auto& map = slot_map_views[map_view_id];          const VAddr cpu_addr = map.cpu_addr;          const std::size_t size = map.size; -        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); +        rasterizer.UpdatePagesCachedCount(cpu_addr, size, false);      }  } | 
