diff options
| -rw-r--r-- | src/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/audio_core/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/input_common/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 109 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 22 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 1 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 39 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 419 | ||||
| -rw-r--r-- | src/video_core/texture_cache/types.h | 1 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.h | 4 | 
17 files changed, 582 insertions, 67 deletions
| diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 65a4922ea..f8ec8fea8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -82,6 +82,7 @@ else()          -Werror=missing-declarations          -Werror=missing-field-initializers          -Werror=reorder +        -Werror=sign-compare          -Werror=switch          -Werror=uninitialized          -Werror=unused-function diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index d25a1a645..090dd19b1 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -51,9 +51,6 @@ if (NOT MSVC)      target_compile_options(audio_core PRIVATE          -Werror=conversion          -Werror=ignored-qualifiers -        -Werror=implicit-fallthrough -        -Werror=reorder -        -Werror=sign-compare          -Werror=shadow          -Werror=unused-parameter          -Werror=unused-variable diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 19b970981..b2b0dbe05 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -667,8 +667,6 @@ else()      target_compile_options(core PRIVATE          -Werror=conversion          -Werror=ignored-qualifiers -        -Werror=implicit-fallthrough -        -Werror=sign-compare          -Werror=shadow          $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index c3423c815..c4283a952 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -44,10 +44,7 @@ else()          -Werror          -Werror=conversion          -Werror=ignored-qualifiers -        -Werror=implicit-fallthrough -        -Werror=reorder          -Werror=shadow -        -Werror=sign-compare          $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>          $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>          -Werror=unused-variable diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7124c755c..d2b9d5f2b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {      } else {          UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);      } -    // Flush and invalidate through the GPU interface, to be asynchronous if possible. -    const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); -    ASSERT(cpu_addr); -    rasterizer->UnmapMemory(*cpu_addr, size); +    const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); + +    for (const auto& map : submapped_ranges) { +        // Flush and invalidate through the GPU interface, to be asynchronous if possible. +        const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first); +        ASSERT(cpu_addr); + +        rasterizer->UnmapMemory(*cpu_addr, map.second); +    }      UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);  } @@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s      //// Lock the new page      // TryLockPage(page_entry, size); +    auto& current_page = page_table[PageEntryIndex(gpu_addr)]; -    page_table[PageEntryIndex(gpu_addr)] = page_entry; +    if ((!current_page.IsValid() && page_entry.IsValid()) || +        current_page.ToAddress() != page_entry.ToAddress()) { +        rasterizer->ModifyGPUMemory(gpu_addr, size); +    } + +    current_page = page_entry;  }  std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, @@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {      return page_entry.ToAddress() + (gpu_addr & page_mask);  } +std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { +    size_t page_index{addr >> page_bits}; +    const size_t page_last{(addr + size + page_size - 1) >> page_bits}; +    while (page_index < page_last) { +        const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; +        if (page_addr && *page_addr != 0) { +            return page_addr; +        } +        ++page_index; +    } +    return std::nullopt; +} +  template <typename T>  T MemoryManager::Read(GPUVAddr addr) const {      if (auto page_pointer{GetPointer(addr)}; page_pointer) { @@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {      return page <= Core::Memory::PAGE_SIZE;  } +bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { +    size_t page_index{gpu_addr >> page_bits}; +    const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; +    std::optional<VAddr> old_page_addr{}; +    while (page_index != page_last) { +        const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; +        if (!page_addr || *page_addr == 0) { +            return false; +        } +        if (old_page_addr) { +            if (*old_page_addr + page_size != *page_addr) { +                return false; +            } +        } +        old_page_addr = page_addr; +        ++page_index; +    } +    return true; +} + +bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { +    size_t page_index{gpu_addr >> page_bits}; +    const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; +    while (page_index < page_last) { +        if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { +            return false; +        } +        ++page_index; +    } +    return true; +} + +std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( +    GPUVAddr gpu_addr, std::size_t size) const { +    std::vector<std::pair<GPUVAddr, std::size_t>> result{}; +    size_t page_index{gpu_addr >> page_bits}; +    size_t remaining_size{size}; +    size_t page_offset{gpu_addr & page_mask}; +    std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; +    std::optional<VAddr> old_page_addr{}; +    const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) { +        if (!last_segment) { +            GPUVAddr new_base_addr = page_index << page_bits; +            last_segment = {new_base_addr, bytes}; +        } else { +            last_segment->second += bytes; +        } +    }; +    const auto split = [this, &last_segment, &result] { +        if (last_segment) { +            result.push_back(*last_segment); +            last_segment = std::nullopt; +        } +    }; +    while (remaining_size > 0) { +        const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; +        const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; +        if (!page_addr) { +            split(); +        } else if (old_page_addr) { +            if (*old_page_addr + page_size != *page_addr) { +                split(); +            } +            extend_size(num_bytes); +        } else { +            extend_size(num_bytes); +        } +        ++page_index; +        page_offset = 0; +        remaining_size -= num_bytes; +    } +    split(); +    return result; +} +  } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b3538d503..99d13e7f6 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -76,6 +76,8 @@ public:      [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; +    [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; +      template <typename T>      [[nodiscard]] T Read(GPUVAddr addr) const; @@ -112,10 +114,28 @@ public:      void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);      /** -     * IsGranularRange checks if a gpu region can be simply read with a pointer. +     * Checks if a gpu region can be simply read with a pointer.       */      [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; +    /** +     * Checks if a gpu region is mapped by a single range of cpu addresses. +     */ +    [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; + +    /** +     * Checks if a gpu region is mapped entirely. +     */ +    [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; + +    /** +     * Returns a vector with all the subranges of cpu addresses mapped beneath. +     * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector +     * will be returned; +     */ +    std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, +                                                                    std::size_t size) const; +      [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);      [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);      [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 07939432f..0cec4225b 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -87,6 +87,9 @@ public:      /// Unmap memory range      virtual void UnmapMemory(VAddr addr, u64 size) = 0; +    /// Remap GPU memory range. This means underneath backing memory changed +    virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; +      /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory      /// and invalidated      virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eb8bdaa85..07ad0e205 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {      shader_cache.OnCPUWrite(addr, size);  } +void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { +    { +        std::scoped_lock lock{texture_cache.mutex}; +        texture_cache.UnmapGPUMemory(addr, size); +    } +} +  void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {      if (!gpu.IsAsync()) {          gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9995a563b..482efed7a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -80,6 +80,7 @@ public:      void OnCPUWrite(VAddr addr, u64 size) override;      void SyncGuestHost() override;      void UnmapMemory(VAddr addr, u64 size) override; +    void ModifyGPUMemory(GPUVAddr addr, u64 size) override;      void SignalSemaphore(GPUVAddr addr, u32 value) override;      void SignalSyncPoint(u32 value) override;      void ReleaseFences() override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1c9120170..bd4d649cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {      pipeline_cache.OnCPUWrite(addr, size);  } +void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { +    { +        std::scoped_lock lock{texture_cache.mutex}; +        texture_cache.UnmapGPUMemory(addr, size); +    } +} +  void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {      if (!gpu.IsAsync()) {          gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index cb8c5c279..41459c5c5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -72,6 +72,7 @@ public:      void OnCPUWrite(VAddr addr, u64 size) override;      void SyncGuestHost() override;      void UnmapMemory(VAddr addr, u64 size) override; +    void ModifyGPUMemory(GPUVAddr addr, u64 size) override;      void SignalSemaphore(GPUVAddr addr, u32 value) override;      void SignalSyncPoint(u32 value) override;      void ReleaseFences() override; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index f22358c90..6052d148a 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_      }  } +ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) +    : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} +  std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {      if (other_addr < gpu_addr) {          // Subresource address can't be lower than the base diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e326cab71..ff1feda9b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 {      Strong = 1 << 5,      ///< Exists in the image table, the dimensions are can be trusted      Registered = 1 << 6,  ///< True when the image is registered      Picked = 1 << 7,      ///< Temporary flag to mark the image as picked +    Remapped = 1 << 8,    ///< Image has been remapped. +    Sparse = 1 << 9,      ///< Image has non continous submemory.      // Garbage Collection Flags -    BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher -                         ///< garbage collection priority -    Alias = 1 << 9,      ///< This image has aliases and has priority on garbage -                         ///< collection +    BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher +                          ///< garbage collection priority +    Alias = 1 << 11,      ///< This image has aliases and has priority on garbage +                          ///< collection  };  DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -57,6 +59,12 @@ struct ImageBase {          return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;      } +    [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { +        const VAddr overlap_end = overlap_gpu_addr + overlap_size; +        const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; +        return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; +    } +      void CheckBadOverlapState();      void CheckAliasState(); @@ -84,6 +92,29 @@ struct ImageBase {      std::vector<AliasedImage> aliased_images;      std::vector<ImageId> overlapping_images; +    ImageMapId map_view_id{}; +}; + +struct ImageMapView { +    explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id); + +    [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { +        const VAddr overlap_end = overlap_cpu_addr + overlap_size; +        const VAddr cpu_addr_end = cpu_addr + size; +        return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; +    } + +    [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { +        const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size; +        const GPUVAddr gpu_addr_end = gpu_addr + size; +        return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; +    } + +    GPUVAddr gpu_addr; +    VAddr cpu_addr; +    size_t size; +    ImageId image_id; +    bool picked{};  };  struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d8dbd3824..e3542301e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -13,6 +13,7 @@  #include <span>  #include <type_traits>  #include <unordered_map> +#include <unordered_set>  #include <utility>  #include <vector> @@ -152,6 +153,9 @@ public:      /// Remove images in a region      void UnmapMemory(VAddr cpu_addr, size_t size); +    /// Remove images in a region +    void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); +      /// Blit an image with the given parameters      void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,                     const Tegra::Engines::Fermi2D::Surface& src, @@ -190,7 +194,22 @@ public:  private:      /// Iterate over all page indices in a range      template <typename Func> -    static void ForEachPage(VAddr addr, size_t size, Func&& func) { +    static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { +        static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; +        const u64 page_end = (addr + size - 1) >> PAGE_BITS; +        for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { +            if constexpr (RETURNS_BOOL) { +                if (func(page)) { +                    break; +                } +            } else { +                func(page); +            } +        } +    } + +    template <typename Func> +    static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {          static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;          const u64 page_end = (addr + size - 1) >> PAGE_BITS;          for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { @@ -220,7 +239,7 @@ private:      FramebufferId GetFramebufferId(const RenderTargets& key);      /// Refresh the contents (pixel data) of an image -    void RefreshContents(Image& image); +    void RefreshContents(Image& image, ImageId image_id);      /// Upload data from guest to an image      template <typename StagingBuffer> @@ -269,6 +288,16 @@ private:      template <typename Func>      void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); +    template <typename Func> +    void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + +    template <typename Func> +    void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + +    /// Iterates over all the images in a region calling func +    template <typename Func> +    void ForEachSparseSegment(ImageBase& image, Func&& func); +      /// Find or create an image view in the given image with the passed parameters      [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -279,10 +308,10 @@ private:      void UnregisterImage(ImageId image);      /// Track CPU reads and writes for image -    void TrackImage(ImageBase& image); +    void TrackImage(ImageBase& image, ImageId image_id);      /// Stop tracking CPU reads and writes for image -    void UntrackImage(ImageBase& image); +    void UntrackImage(ImageBase& image, ImageId image_id);      /// Delete image from the cache      void DeleteImage(ImageId image); @@ -340,7 +369,13 @@ private:      std::unordered_map<TSCEntry, SamplerId> samplers;      std::unordered_map<RenderTargets, FramebufferId> framebuffers; -    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; +    std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; +    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; +    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; + +    std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; + +    VAddr virtual_invalid_space{};      bool has_deleted_images = false;      u64 total_used_memory = 0; @@ -349,6 +384,7 @@ private:      u64 critical_memory;      SlotVector<Image> slot_images; +    SlotVector<ImageMapView> slot_map_views;      SlotVector<ImageView> slot_image_views;      SlotVector<ImageAlloc> slot_image_allocs;      SlotVector<Sampler> slot_samplers; @@ -459,7 +495,7 @@ void TextureCache<P>::RunGarbageCollector() {                  }              }              if (True(image->flags & ImageFlagBits::Tracked)) { -                UntrackImage(*image); +                UntrackImage(*image, image_id);              }              UnregisterImage(image_id);              DeleteImage(image_id); @@ -658,7 +694,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {              return;          }          image.flags |= ImageFlagBits::CpuModified; -        UntrackImage(image); +        if (True(image.flags & ImageFlagBits::Tracked)) { +            UntrackImage(image, image_id); +        }      });  } @@ -695,7 +733,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {      for (const ImageId id : deleted_images) {          Image& image = slot_images[id];          if (True(image.flags & ImageFlagBits::Tracked)) { -            UntrackImage(image); +            UntrackImage(image, id);          }          UnregisterImage(id);          DeleteImage(id); @@ -703,6 +741,23 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {  }  template <class P> +void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { +    std::vector<ImageId> deleted_images; +    ForEachImageInRegionGPU(gpu_addr, size, +                            [&](ImageId id, Image&) { deleted_images.push_back(id); }); +    for (const ImageId id : deleted_images) { +        Image& image = slot_images[id]; +        if (True(image.flags & ImageFlagBits::Remapped)) { +            continue; +        } +        image.flags |= ImageFlagBits::Remapped; +        if (True(image.flags & ImageFlagBits::Tracked)) { +            UntrackImage(image, id); +        } +    } +} + +template <class P>  void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,                                  const Tegra::Engines::Fermi2D::Surface& src,                                  const Tegra::Engines::Fermi2D::Config& copy, @@ -833,9 +888,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad      if (it == page_table.end()) {          return nullptr;      } -    const auto& image_ids = it->second; -    for (const ImageId image_id : image_ids) { -        const ImageBase& image = slot_images[image_id]; +    const auto& image_map_ids = it->second; +    for (const ImageMapId map_id : image_map_ids) { +        const ImageMapView& map = slot_map_views[map_id]; +        const ImageBase& image = slot_images[map.image_id];          if (image.cpu_addr != cpu_addr) {              continue;          } @@ -915,13 +971,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {  }  template <class P> -void TextureCache<P>::RefreshContents(Image& image) { +void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {      if (False(image.flags & ImageFlagBits::CpuModified)) {          // Only upload modified images          return;      }      image.flags &= ~ImageFlagBits::CpuModified; -    TrackImage(image); +    TrackImage(image, image_id);      if (image.info.num_samples > 1) {          LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); @@ -958,7 +1014,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)  template <class P>  ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { -    if (!IsValidAddress(gpu_memory, config)) { +    if (!IsValidEntry(gpu_memory, config)) {          return NULL_IMAGE_VIEW_ID;      }      const auto [pair, is_new] = image_views.try_emplace(config); @@ -1000,14 +1056,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a  template <class P>  ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,                                     RelaxedOptions options) { -    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); +    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);      if (!cpu_addr) { -        return ImageId{}; +        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); +        if (!cpu_addr) { +            return ImageId{}; +        }      }      const bool broken_views = runtime.HasBrokenTextureViewFormats();      const bool native_bgr = runtime.HasNativeBgr();      ImageId image_id;      const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { +        if (True(existing_image.flags & ImageFlagBits::Remapped)) { +            return false; +        }          if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {              const bool strict_size = False(options & RelaxedOptions::Size) &&                                       True(existing_image.flags & ImageFlagBits::Strong); @@ -1033,7 +1095,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,  template <class P>  ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,                                       RelaxedOptions options) { -    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); +    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); +    if (!cpu_addr) { +        const auto size = CalculateGuestSizeInBytes(info); +        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); +        if (!cpu_addr) { +            const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; +            virtual_invalid_space += Common::AlignUp(size, 32); +            cpu_addr = std::optional<VAddr>(fake_addr); +        } +    }      ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);      const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);      const Image& image = slot_images[image_id]; @@ -1053,10 +1124,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA      const bool broken_views = runtime.HasBrokenTextureViewFormats();      const bool native_bgr = runtime.HasNativeBgr();      std::vector<ImageId> overlap_ids; +    std::unordered_set<ImageId> overlaps_found;      std::vector<ImageId> left_aliased_ids;      std::vector<ImageId> right_aliased_ids; +    std::unordered_set<ImageId> ignore_textures;      std::vector<ImageId> bad_overlap_ids; -    ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { +    const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { +        if (True(overlap.flags & ImageFlagBits::Remapped)) { +            ignore_textures.insert(overlap_id); +            return; +        }          if (info.type == ImageType::Linear) {              if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {                  // Alias linear images with the same pitch @@ -1064,6 +1141,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA              }              return;          } +        overlaps_found.insert(overlap_id);          static constexpr bool strict_size = true;          const std::optional<OverlapResult> solution = ResolveOverlap(              new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); @@ -1087,12 +1165,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA              bad_overlap_ids.push_back(overlap_id);              overlap.flags |= ImageFlagBits::BadOverlap;          } -    }); +    }; +    ForEachImageInRegion(cpu_addr, size_bytes, region_check); +    const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { +        if (!overlaps_found.contains(overlap_id)) { +            if (True(overlap.flags & ImageFlagBits::Remapped)) { +                ignore_textures.insert(overlap_id); +            } +            if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { +                ignore_textures.insert(overlap_id); +            } +        } +    }; +    ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);      const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);      Image& new_image = slot_images[new_image_id]; +    if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { +        new_image.flags |= ImageFlagBits::Sparse; +    } + +    for (const ImageId overlap_id : ignore_textures) { +        Image& overlap = slot_images[overlap_id]; +        if (True(overlap.flags & ImageFlagBits::GpuModified)) { +            UNIMPLEMENTED(); +        } +        if (True(overlap.flags & ImageFlagBits::Tracked)) { +            UntrackImage(overlap, overlap_id); +        } +        UnregisterImage(overlap_id); +        DeleteImage(overlap_id); +    } +      // TODO: Only upload what we need -    RefreshContents(new_image); +    RefreshContents(new_image, new_image_id);      for (const ImageId overlap_id : overlap_ids) {          Image& overlap = slot_images[overlap_id]; @@ -1104,7 +1210,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA              runtime.CopyImage(new_image, overlap, copies);          }          if (True(overlap.flags & ImageFlagBits::Tracked)) { -            UntrackImage(overlap); +            UntrackImage(overlap, overlap_id);          }          UnregisterImage(overlap_id);          DeleteImage(overlap_id); @@ -1239,7 +1345,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f      using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;      static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;      boost::container::small_vector<ImageId, 32> images; -    ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { +    boost::container::small_vector<ImageMapId, 32> maps; +    ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {          const auto it = page_table.find(page);          if (it == page_table.end()) {              if constexpr (BOOL_BREAK) { @@ -1248,12 +1355,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f                  return;              }          } +        for (const ImageMapId map_id : it->second) { +            ImageMapView& map = slot_map_views[map_id]; +            if (map.picked) { +                continue; +            } +            if (!map.Overlaps(cpu_addr, size)) { +                continue; +            } +            map.picked = true; +            maps.push_back(map_id); +            Image& image = slot_images[map.image_id]; +            if (True(image.flags & ImageFlagBits::Picked)) { +                continue; +            } +            image.flags |= ImageFlagBits::Picked; +            images.push_back(map.image_id); +            if constexpr (BOOL_BREAK) { +                if (func(map.image_id, image)) { +                    return true; +                } +            } else { +                func(map.image_id, image); +            } +        } +        if constexpr (BOOL_BREAK) { +            return false; +        } +    }); +    for (const ImageId image_id : images) { +        slot_images[image_id].flags &= ~ImageFlagBits::Picked; +    } +    for (const ImageMapId map_id : maps) { +        slot_map_views[map_id].picked = false; +    } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { +    using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; +    static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; +    boost::container::small_vector<ImageId, 8> images; +    ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { +        const auto it = gpu_page_table.find(page); +        if (it == gpu_page_table.end()) { +            if constexpr (BOOL_BREAK) { +                return false; +            } else { +                return; +            } +        } +        for (const ImageId image_id : it->second) { +            Image& image = slot_images[image_id]; +            if (True(image.flags & ImageFlagBits::Picked)) { +                continue; +            } +            if (!image.OverlapsGPU(gpu_addr, size)) { +                continue; +            } +            image.flags |= ImageFlagBits::Picked; +            images.push_back(image_id); +            if constexpr (BOOL_BREAK) { +                if (func(image_id, image)) { +                    return true; +                } +            } else { +                func(image_id, image); +            } +        } +        if constexpr (BOOL_BREAK) { +            return false; +        } +    }); +    for (const ImageId image_id : images) { +        slot_images[image_id].flags &= ~ImageFlagBits::Picked; +    } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { +    using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; +    static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; +    boost::container::small_vector<ImageId, 8> images; +    ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { +        const auto it = sparse_page_table.find(page); +        if (it == sparse_page_table.end()) { +            if constexpr (BOOL_BREAK) { +                return false; +            } else { +                return; +            } +        }          for (const ImageId image_id : it->second) {              Image& image = slot_images[image_id];              if (True(image.flags & ImageFlagBits::Picked)) {                  continue;              } -            if (!image.Overlaps(cpu_addr, size)) { +            if (!image.OverlapsGPU(gpu_addr, size)) {                  continue;              }              image.flags |= ImageFlagBits::Picked; @@ -1276,6 +1476,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f  }  template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { +    using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; +    static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; +    const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); +    for (auto& segment : segments) { +        const auto gpu_addr = segment.first; +        const auto size = segment.second; +        std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); +        ASSERT(cpu_addr); +        if constexpr (RETURNS_BOOL) { +            if (func(gpu_addr, *cpu_addr, size)) { +                break; +            } +        } else { +            func(gpu_addr, *cpu_addr, size); +        } +    } +} + +template <class P>  ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {      Image& image = slot_images[image_id];      if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { @@ -1292,8 +1513,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {      ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),                 "Trying to register an already registered image");      image.flags |= ImageFlagBits::Registered; -    ForEachPage(image.cpu_addr, image.guest_size_bytes, -                [this, image_id](u64 page) { page_table[page].push_back(image_id); });      u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);      if ((IsPixelFormatASTC(image.info.format) &&           True(image.flags & ImageFlagBits::AcceleratedUpload)) || @@ -1301,6 +1520,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {          tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);      }      total_used_memory += Common::AlignUp(tentative_size, 1024); +    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, +                   [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); +    if (False(image.flags & ImageFlagBits::Sparse)) { +        auto map_id = +            slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); +        ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, +                       [this, map_id](u64 page) { page_table[page].push_back(map_id); }); +        image.map_view_id = map_id; +        return; +    } +    std::vector<ImageViewId> sparse_maps{}; +    ForEachSparseSegment( +        image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { +            auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); +            ForEachCPUPage(cpu_addr, size, +                           [this, map_id](u64 page) { page_table[page].push_back(map_id); }); +            sparse_maps.push_back(map_id); +        }); +    sparse_views.emplace(image_id, std::move(sparse_maps)); +    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, +                   [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });  }  template <class P> @@ -1317,34 +1557,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {          tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);      }      total_used_memory -= Common::AlignUp(tentative_size, 1024); -    ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { -        const auto page_it = page_table.find(page); -        if (page_it == page_table.end()) { -            UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); -            return; -        } -        std::vector<ImageId>& image_ids = page_it->second; -        const auto vector_it = std::ranges::find(image_ids, image_id); -        if (vector_it == image_ids.end()) { -            UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); -            return; -        } -        image_ids.erase(vector_it); +    const auto& clear_page_table = +        [this, image_id]( +            u64 page, +            std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { +            const auto page_it = selected_page_table.find(page); +            if (page_it == selected_page_table.end()) { +                UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); +                return; +            } +            std::vector<ImageId>& image_ids = page_it->second; +            const auto vector_it = std::ranges::find(image_ids, image_id); +            if (vector_it == image_ids.end()) { +                UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", +                                page << PAGE_BITS); +                return; +            } +            image_ids.erase(vector_it); +        }; +    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, +                   [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); +    if (False(image.flags & ImageFlagBits::Sparse)) { +        const auto map_id = image.map_view_id; +        ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { +            const auto page_it = page_table.find(page); +            if (page_it == page_table.end()) { +                UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); +                return; +            } +            std::vector<ImageMapId>& image_map_ids = page_it->second; +            const auto vector_it = std::ranges::find(image_map_ids, map_id); +            if (vector_it == image_map_ids.end()) { +                UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", +                                page << PAGE_BITS); +                return; +            } +            image_map_ids.erase(vector_it); +        }); +        slot_map_views.erase(map_id); +        return; +    } +    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { +        clear_page_table(page, sparse_page_table);      }); +    auto it = sparse_views.find(image_id); +    ASSERT(it != sparse_views.end()); +    auto& sparse_maps = it->second; +    for (auto& map_view_id : sparse_maps) { +        const auto& map_range = slot_map_views[map_view_id]; +        const VAddr cpu_addr = map_range.cpu_addr; +        const std::size_t size = map_range.size; +        ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { +            const auto page_it = page_table.find(page); +            if (page_it == page_table.end()) { +                UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); +                return; +            } +            std::vector<ImageMapId>& image_map_ids = page_it->second; +            auto vector_it = image_map_ids.begin(); +            while (vector_it != image_map_ids.end()) { +                ImageMapView& map = slot_map_views[*vector_it]; +                if (map.image_id != image_id) { +                    vector_it++; +                    continue; +                } +                if (!map.picked) { +                    map.picked = true; +                } +                vector_it = image_map_ids.erase(vector_it); +            } +        }); +        slot_map_views.erase(map_view_id); +    } +    sparse_views.erase(it);  }  template <class P> -void TextureCache<P>::TrackImage(ImageBase& image) { +void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {      ASSERT(False(image.flags & ImageFlagBits::Tracked));      image.flags |= ImageFlagBits::Tracked; -    rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +    if (False(image.flags & ImageFlagBits::Sparse)) { +        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +        return; +    } +    if (True(image.flags & ImageFlagBits::Registered)) { +        auto it = sparse_views.find(image_id); +        ASSERT(it != sparse_views.end()); +        auto& sparse_maps = it->second; +        for (auto& map_view_id : sparse_maps) { +            const auto& map = slot_map_views[map_view_id]; +            const VAddr cpu_addr = map.cpu_addr; +            const std::size_t size = map.size; +            rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); +        } +        return; +    } +    ForEachSparseSegment(image, +                         [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { +                             rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); +                         });  }  template <class P> -void TextureCache<P>::UntrackImage(ImageBase& image) { +void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {      ASSERT(True(image.flags & ImageFlagBits::Tracked));      image.flags &= ~ImageFlagBits::Tracked; -    rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +    if (False(image.flags & ImageFlagBits::Sparse)) { +        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +        return; +    } +    ASSERT(True(image.flags & ImageFlagBits::Registered)); +    auto it = sparse_views.find(image_id); +    ASSERT(it != sparse_views.end()); +    auto& sparse_maps = it->second; +    for (auto& map_view_id : sparse_maps) { +        const auto& map = slot_map_views[map_view_id]; +        const VAddr cpu_addr = map.cpu_addr; +        const std::size_t size = map.size; +        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); +    }  }  template <class P> @@ -1486,10 +1817,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool      if (invalidate) {          image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);          if (False(image.flags & ImageFlagBits::Tracked)) { -            TrackImage(image); +            TrackImage(image, image_id);          }      } else { -        RefreshContents(image); +        RefreshContents(image, image_id);          SynchronizeAliases(image_id);      }      if (is_modification) { diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index c9571f7e4..9fbdc1ac6 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;  constexpr SlotId CORRUPT_ID{0xfffffffe};  using ImageId = SlotId; +using ImageMapId = SlotId;  using ImageViewId = SlotId;  using ImageAllocId = SlotId;  using SamplerId = SlotId; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 20794fa32..c872517b8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {      return offsets;  } +LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { +    const u32 num_levels = info.resources.levels; +    const LevelInfo level_info = MakeLevelInfo(info); +    LevelArray sizes{}; +    for (u32 level = 0; level < num_levels; ++level) { +        sizes[level] = CalculateLevelSize(level_info, level); +    } +    return sizes; +} +  std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {      ASSERT(info.type == ImageType::e3D);      std::vector<u32> offsets; @@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn      return copies;  } -bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { -    if (config.Address() == 0) { +bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { +    const GPUVAddr address = config.Address(); +    if (address == 0) {          return false;      } -    if (config.Address() > (u64(1) << 48)) { +    if (address > (1ULL << 48)) {          return false;      } -    return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); +    if (gpu_memory.GpuToCpuAddress(address).has_value()) { +        return true; +    } +    const ImageInfo info{config}; +    const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); +    return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();  }  std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index cdc5cbc75..766502908 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -40,6 +40,8 @@ struct OverlapResult {  [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; +[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; +  [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);  [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); @@ -55,7 +57,7 @@ struct OverlapResult {                                                             const ImageInfo& src,                                                             SubresourceBase base); -[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); +[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);  [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,                                                            GPUVAddr gpu_addr, const ImageInfo& info, | 
