diff options
| -rw-r--r-- | src/common/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/common/range_mutex.h | 93 | ||||
| -rw-r--r-- | src/core/device_memory_manager.h | 18 | ||||
| -rw-r--r-- | src/core/device_memory_manager.inc | 63 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/container.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 87 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 4 | 
7 files changed, 190 insertions, 80 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e30fea268..85926fc8f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -106,6 +106,7 @@ add_library(common STATIC      precompiled_headers.h      quaternion.h      range_map.h +    range_mutex.h      reader_writer_queue.h      ring_buffer.h      ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp diff --git a/src/common/range_mutex.h b/src/common/range_mutex.h new file mode 100644 index 000000000..d6c949811 --- /dev/null +++ b/src/common/range_mutex.h @@ -0,0 +1,93 @@ +// SPDX-FileCopyrightText: 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <condition_variable> +#include <mutex> + +#include "common/intrusive_list.h" + +namespace Common { + +class ScopedRangeLock; + +class RangeMutex { +public: +    explicit RangeMutex() = default; +    ~RangeMutex() = default; + +private: +    friend class ScopedRangeLock; + +    void Lock(ScopedRangeLock& l); +    void Unlock(ScopedRangeLock& l); +    bool HasIntersectionLocked(ScopedRangeLock& l); + +private: +    std::mutex m_mutex; +    std::condition_variable m_cv; + +    using LockList = Common::IntrusiveListBaseTraits<ScopedRangeLock>::ListType; +    LockList m_list; +}; + +class ScopedRangeLock : public Common::IntrusiveListBaseNode<ScopedRangeLock> { +public: +    explicit ScopedRangeLock(RangeMutex& mutex, u64 address, u64 size) +        : m_mutex(mutex), m_address(address), m_size(size) { +        if (m_size > 0) { +            m_mutex.Lock(*this); +        } +    } +    ~ScopedRangeLock() { +        if (m_size > 0) { +            m_mutex.Unlock(*this); +        } +    } + +    u64 GetAddress() const { +        return m_address; +    } + +    u64 GetSize() const { +        return m_size; +    } + +private: +    RangeMutex& m_mutex; +    const u64 m_address{}; +    const u64 m_size{}; +}; + +inline void RangeMutex::Lock(ScopedRangeLock& l) { +    std::unique_lock lk{m_mutex}; +    m_cv.wait(lk, [&] { return !HasIntersectionLocked(l); }); +    m_list.push_back(l); +} + +inline void RangeMutex::Unlock(ScopedRangeLock& l) { +    { +        std::scoped_lock lk{m_mutex}; +        m_list.erase(m_list.iterator_to(l)); +    } +    m_cv.notify_all(); +} + +inline bool RangeMutex::HasIntersectionLocked(ScopedRangeLock& l) { +    const auto cur_begin = l.GetAddress(); +    const auto cur_last = l.GetAddress() + l.GetSize() - 1; + +    for (const auto& other : m_list) { +        const auto other_begin = other.GetAddress(); +        const auto other_last = other.GetAddress() + other.GetSize() - 1; + +        if (cur_begin <= other_last && other_begin <= cur_last) { +            return true; +        } +    } + +    return false; +} + +} // namespace Common diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index ffeed46cc..0568a821b 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -5,11 +5,13 @@  #include <array>  #include <atomic> +#include <bit>  #include <deque>  #include <memory>  #include <mutex>  #include "common/common_types.h" +#include "common/range_mutex.h"  #include "common/scratch_buffer.h"  #include "common/virtual_buffer.h" @@ -180,31 +182,35 @@ private:      }      Common::VirtualBuffer<VAddr> cpu_backing_address; -    static constexpr size_t subentries = 8 / sizeof(u8); +    using CounterType = u8; +    using CounterAtomicType = std::atomic_uint8_t; +    static constexpr size_t subentries = 8 / sizeof(CounterType);      static constexpr size_t subentries_mask = subentries - 1; +    static constexpr size_t subentries_shift = +        std::countr_zero(sizeof(u64)) - std::countr_zero(sizeof(CounterType));      class CounterEntry final {      public:          CounterEntry() = default; -        std::atomic_uint8_t& Count(std::size_t page) { +        CounterAtomicType& Count(std::size_t page) {              return values[page & subentries_mask];          } -        const std::atomic_uint8_t& Count(std::size_t page) const { +        const CounterAtomicType& Count(std::size_t page) const {              return values[page & subentries_mask];          }      private: -        std::array<std::atomic_uint8_t, subentries> values{}; +        std::array<CounterAtomicType, subentries> values{};      }; -    static_assert(sizeof(CounterEntry) == subentries * sizeof(u8), +    static_assert(sizeof(CounterEntry) == subentries * sizeof(CounterType),                    "CounterEntry should be 8 bytes!");      static constexpr size_t num_counter_entries =          (1ULL << (device_virtual_bits - page_bits)) / subentries;      using CachedPages = std::array<CounterEntry, num_counter_entries>;      std::unique_ptr<CachedPages> cached_pages; -    std::mutex counter_guard; +    Common::RangeMutex counter_guard;      std::mutex mapping_guard;  }; diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index eab8a2731..b026f4220 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -213,8 +213,8 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) {  }  template <typename Traits> -void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, -                                      Asid asid, bool track) { +void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, +                                      bool track) {      Core::Memory::Memory* process_memory = registered_processes[asid.id];      size_t start_page_d = address >> Memory::YUZU_PAGEBITS;      size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; @@ -508,12 +508,7 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) {  template <typename Traits>  void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { -    std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock); -    const auto Lock = [&] { -        if (!lk) { -            lk.lock(); -        } -    }; +    Common::ScopedRangeLock lk(counter_guard, addr, size);      u64 uncache_begin = 0;      u64 cache_begin = 0;      u64 uncache_bytes = 0; @@ -524,22 +519,36 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size      const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);      size_t page = addr >> Memory::YUZU_PAGEBITS;      auto [asid, base_vaddress] = ExtractCPUBacking(page); -    size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;      auto* memory_device_inter = registered_processes[asid.id]; +    const auto release_pending = [&] { +        if (uncache_bytes > 0) { +            MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, +                              uncache_bytes, false); +            uncache_bytes = 0; +        } +        if (cache_bytes > 0) { +            MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, +                              cache_bytes, true); +            cache_bytes = 0; +        } +    };      for (; page != page_end; ++page) { -        std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page); +        CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page); +        auto [asid_2, vpage] = ExtractCPUBacking(page); +        vpage >>= Memory::YUZU_PAGEBITS; -        if (delta > 0) { -            ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(), -                       "Count may overflow!"); -        } else if (delta < 0) { -            ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); -        } else { -            ASSERT_MSG(false, "Delta must be non-zero!"); +        if (vpage == 0) [[unlikely]] { +            release_pending(); +            continue; +        } + +        if (asid.id != asid_2.id) [[unlikely]] { +            release_pending(); +            memory_device_inter = registered_processes[asid_2.id];          }          // Adds or subtracts 1, as count is a unsigned 8-bit value -        count.fetch_add(static_cast<u8>(delta), std::memory_order_release); +        count.fetch_add(static_cast<CounterType>(delta), std::memory_order_release);          // Assume delta is either -1 or 1          if (count.load(std::memory_order::relaxed) == 0) { @@ -548,7 +557,6 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size              }              uncache_bytes += Memory::YUZU_PAGESIZE;          } else if (uncache_bytes > 0) { -            Lock();              MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,                                uncache_bytes, false);              uncache_bytes = 0; @@ -559,23 +567,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size              }              cache_bytes += Memory::YUZU_PAGESIZE;          } else if (cache_bytes > 0) { -            Lock(); -            MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, -                              true); +            MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, +                              cache_bytes, true);              cache_bytes = 0;          } -        vpage++; -    } -    if (uncache_bytes > 0) { -        Lock(); -        MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, -                          false); -    } -    if (cache_bytes > 0) { -        Lock(); -        MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, -                          true);      } +    release_pending();  }  } // namespace Core diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index dc1b4d5be..e89cca6f2 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -83,7 +83,9 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {              // Check if this memory block is heap.              if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { -                if (svc_mem_info.size > region_size) { +                if (region_start + region_size == svc_mem_info.base_address) { +                    region_size += svc_mem_info.size; +                } else if (svc_mem_info.size > region_size) {                      region_size = svc_mem_info.size;                      region_start = svc_mem_info.base_address;                  } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7398ed2ec..a7400adfa 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1431,7 +1431,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA              }          }      }; -    ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); +    ForEachSparseImageInRegion(channel_state->gpu_memory.GetID(), gpu_addr, size_bytes, +                               region_check_gpu);      bool can_rescale = info.rescaleable;      bool any_rescaled = false; @@ -1842,7 +1843,7 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s      if (!storage_id) {          return;      } -    auto& gpu_page_table = gpu_page_table_storage[*storage_id]; +    auto& gpu_page_table = gpu_page_table_storage[*storage_id * 2];      ForEachGPUPage(gpu_addr, size,                     [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) {                         const auto it = gpu_page_table.find(page); @@ -1882,41 +1883,48 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s  template <class P>  template <typename Func> -void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { +void TextureCache<P>::ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, +                                                 Func&& func) {      using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;      static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;      boost::container::small_vector<ImageId, 8> images; -    ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { -        const auto it = sparse_page_table.find(page); -        if (it == sparse_page_table.end()) { -            if constexpr (BOOL_BREAK) { -                return false; -            } else { -                return; -            } -        } -        for (const ImageId image_id : it->second) { -            Image& image = slot_images[image_id]; -            if (True(image.flags & ImageFlagBits::Picked)) { -                continue; -            } -            if (!image.OverlapsGPU(gpu_addr, size)) { -                continue; -            } -            image.flags |= ImageFlagBits::Picked; -            images.push_back(image_id); -            if constexpr (BOOL_BREAK) { -                if (func(image_id, image)) { -                    return true; -                } -            } else { -                func(image_id, image); -            } -        } -        if constexpr (BOOL_BREAK) { -            return false; -        } -    }); +    auto storage_id = getStorageID(as_id); +    if (!storage_id) { +        return; +    } +    auto& sparse_page_table = gpu_page_table_storage[*storage_id * 2 + 1]; +    ForEachGPUPage(gpu_addr, size, +                   [this, &sparse_page_table, &images, gpu_addr, size, func](u64 page) { +                       const auto it = sparse_page_table.find(page); +                       if (it == sparse_page_table.end()) { +                           if constexpr (BOOL_BREAK) { +                               return false; +                           } else { +                               return; +                           } +                       } +                       for (const ImageId image_id : it->second) { +                           Image& image = slot_images[image_id]; +                           if (True(image.flags & ImageFlagBits::Picked)) { +                               continue; +                           } +                           if (!image.OverlapsGPU(gpu_addr, size)) { +                               continue; +                           } +                           image.flags |= ImageFlagBits::Picked; +                           images.push_back(image_id); +                           if constexpr (BOOL_BREAK) { +                               if (func(image_id, image)) { +                                   return true; +                               } +                           } else { +                               func(image_id, image); +                           } +                       } +                       if constexpr (BOOL_BREAK) { +                           return false; +                       } +                   });      for (const ImageId image_id : images) {          slot_images[image_id].flags &= ~ImageFlagBits::Picked;      } @@ -1988,8 +1996,9 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {              sparse_maps.push_back(map_id);          });      sparse_views.emplace(image_id, std::move(sparse_maps)); -    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, -                   [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); +    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { +        (*channel_state->sparse_page_table)[page].push_back(image_id); +    });  }  template <class P> @@ -2042,7 +2051,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {          return;      }      ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { -        clear_page_table(page, sparse_page_table); +        clear_page_table(page, (*channel_state->sparse_page_table));      });      auto it = sparse_views.find(image_id);      ASSERT(it != sparse_views.end()); @@ -2496,13 +2505,15 @@ void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel      const auto it = channel_map.find(channel.bind_id);      auto* this_state = &channel_storage[it->second];      const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; -    this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id]; +    this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2]; +    this_state->sparse_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2 + 1];  }  /// Bind a channel for execution.  template <class P>  void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {      gpu_page_table_storage.emplace_back(); +    gpu_page_table_storage.emplace_back();  }  } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 8699d40d4..f9aebb293 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -86,6 +86,7 @@ public:      std::unordered_map<TSCEntry, SamplerId> samplers;      TextureCacheGPUMap* gpu_page_table; +    TextureCacheGPUMap* sparse_page_table;  };  template <class P> @@ -357,7 +358,7 @@ private:      void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);      template <typename Func> -    void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); +    void ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);      /// Iterates over all the images in a region calling func      template <typename Func> @@ -431,7 +432,6 @@ private:      std::unordered_map<RenderTargets, FramebufferId> framebuffers;      std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; -    std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;      std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;      DAddr virtual_invalid_space{};  | 
