diff options
| author | Rodrigo Locatti <reinuseslisp@airmail.cc> | 2020-04-09 17:59:21 -0300 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-09 17:59:21 -0300 | 
| commit | 36f607217fa9172d2e1b76e327fdb03b0498ae4d (patch) | |
| tree | 523309af11b2e7f47f3f6060fad1d86e9d446206 /src/video_core | |
| parent | b96fd0bd0e562770399441c8164069d2437f00e7 (diff) | |
| parent | 7cd6daf11524268afad1fc51af849d91cf7df01b (diff) | |
Merge pull request #3610 from FernandoS27/gpu-caches
Refactor all the GPU Caches to use VAddr for cache addressing
Diffstat (limited to 'src/video_core')
30 files changed, 374 insertions, 424 deletions
| diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index 4b9193182..e35ee0b67 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -15,37 +15,29 @@ namespace VideoCommon {  class BufferBlock {  public: -    bool Overlaps(const CacheAddr start, const CacheAddr end) const { -        return (cache_addr < end) && (cache_addr_end > start); +    bool Overlaps(const VAddr start, const VAddr end) const { +        return (cpu_addr < end) && (cpu_addr_end > start);      } -    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { -        return cache_addr <= other_start && other_end <= cache_addr_end; +    bool IsInside(const VAddr other_start, const VAddr other_end) const { +        return cpu_addr <= other_start && other_end <= cpu_addr_end;      } -    u8* GetWritableHostPtr() const { -        return FromCacheAddr(cache_addr); +    std::size_t GetOffset(const VAddr in_addr) { +        return static_cast<std::size_t>(in_addr - cpu_addr);      } -    u8* GetWritableHostPtr(std::size_t offset) const { -        return FromCacheAddr(cache_addr + offset); +    VAddr GetCpuAddr() const { +        return cpu_addr;      } -    std::size_t GetOffset(const CacheAddr in_addr) { -        return static_cast<std::size_t>(in_addr - cache_addr); +    VAddr GetCpuAddrEnd() const { +        return cpu_addr_end;      } -    CacheAddr GetCacheAddr() const { -        return cache_addr; -    } - -    CacheAddr GetCacheAddrEnd() const { -        return cache_addr_end; -    } - -    void SetCacheAddr(const CacheAddr new_addr) { -        cache_addr = new_addr; -        cache_addr_end = new_addr + size; +    void SetCpuAddr(const VAddr new_addr) { +        cpu_addr = new_addr; +        cpu_addr_end = new_addr + size;      }      std::size_t GetSize() const { @@ -61,14 +53,14 @@ public:      }  protected: -    explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { -        SetCacheAddr(cache_addr); +    explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { +        SetCpuAddr(cpu_addr);      }      ~BufferBlock() = default;  private: -    CacheAddr cache_addr{}; -    CacheAddr cache_addr_end{}; +    VAddr cpu_addr{}; +    VAddr cpu_addr_end{};      std::size_t size{};      u64 epoch{};  }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 186aca61d..b57c0d4d4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -19,6 +19,7 @@  #include "common/alignment.h"  #include "common/common_types.h"  #include "core/core.h" +#include "core/memory.h"  #include "video_core/buffer_cache/buffer_block.h"  #include "video_core/buffer_cache/map_interval.h"  #include "video_core/memory_manager.h" @@ -37,28 +38,45 @@ public:                              bool is_written = false, bool use_fast_cbuf = false) {          std::lock_guard lock{mutex}; -        auto& memory_manager = system.GPU().MemoryManager(); -        const auto host_ptr = memory_manager.GetPointer(gpu_addr); -        if (!host_ptr) { +        const std::optional<VAddr> cpu_addr_opt = +            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + +        if (!cpu_addr_opt) {              return {GetEmptyBuffer(size), 0};          } -        const auto cache_addr = ToCacheAddr(host_ptr); + +        VAddr cpu_addr = *cpu_addr_opt;          // Cache management is a big overhead, so only cache entries with a given size.          // TODO: Figure out which size is the best for given games.          constexpr std::size_t max_stream_size = 0x800;          if (use_fast_cbuf || size < max_stream_size) { -            if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { +            if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { +                auto& memory_manager = system.GPU().MemoryManager();                  if (use_fast_cbuf) { -                    return ConstBufferUpload(host_ptr, size); +                    if (memory_manager.IsGranularRange(gpu_addr, size)) { +                        const auto host_ptr = memory_manager.GetPointer(gpu_addr); +                        return ConstBufferUpload(host_ptr, size); +                    } else { +                        staging_buffer.resize(size); +                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); +                        return ConstBufferUpload(staging_buffer.data(), size); +                    }                  } else { -                    return StreamBufferUpload(host_ptr, size, alignment); +                    if (memory_manager.IsGranularRange(gpu_addr, size)) { +                        const auto host_ptr = memory_manager.GetPointer(gpu_addr); +                        return StreamBufferUpload(host_ptr, size, alignment); +                    } else { +                        staging_buffer.resize(size); +                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); +                        return StreamBufferUpload(staging_buffer.data(), size, alignment); +                    }                  }              }          } -        auto block = GetBlock(cache_addr, size); -        auto map = MapAddress(block, gpu_addr, cache_addr, size); +        auto block = GetBlock(cpu_addr, size); +        auto map = MapAddress(block, gpu_addr, cpu_addr, size);          if (is_written) {              map->MarkAsModified(true, GetModifiedTicks());              if (!map->IsWritten()) { @@ -71,7 +89,7 @@ public:              }          } -        const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); +        const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));          return {ToHandle(block), offset};      } @@ -112,7 +130,7 @@ public:      }      /// Write any cached resources overlapping the specified region back to memory -    void FlushRegion(CacheAddr addr, std::size_t size) { +    void FlushRegion(VAddr addr, std::size_t size) {          std::lock_guard lock{mutex};          std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -127,7 +145,7 @@ public:      }      /// Mark the specified region as being invalidated -    void InvalidateRegion(CacheAddr addr, u64 size) { +    void InvalidateRegion(VAddr addr, u64 size) {          std::lock_guard lock{mutex};          std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -152,7 +170,7 @@ protected:      virtual void WriteBarrier() = 0; -    virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; +    virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;      virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,                                   const u8* data) = 0; @@ -169,20 +187,17 @@ protected:      /// Register an object into the cache      void Register(const MapInterval& new_map, bool inherit_written = false) { -        const CacheAddr cache_ptr = new_map->GetStart(); -        const std::optional<VAddr> cpu_addr = -            system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); -        if (!cache_ptr || !cpu_addr) { +        const VAddr cpu_addr = new_map->GetStart(); +        if (!cpu_addr) {              LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",                           new_map->GetGpuAddress());              return;          }          const std::size_t size = new_map->GetEnd() - new_map->GetStart(); -        new_map->SetCpuAddress(*cpu_addr);          new_map->MarkAsRegistered(true);          const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};          mapped_addresses.insert({interval, new_map}); -        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); +        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);          if (inherit_written) {              MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);              new_map->MarkAsWritten(true); @@ -192,7 +207,7 @@ protected:      /// Unregisters an object from the cache      void Unregister(MapInterval& map) {          const std::size_t size = map->GetEnd() - map->GetStart(); -        rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); +        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);          map->MarkAsRegistered(false);          if (map->IsWritten()) {              UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); @@ -202,32 +217,39 @@ protected:      }  private: -    MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { +    MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {          return std::make_shared<MapIntervalBase>(start, end, gpu_addr);      } -    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, -                           const CacheAddr cache_addr, const std::size_t size) { +    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, +                           const std::size_t size) { -        std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); +        std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);          if (overlaps.empty()) { -            const CacheAddr cache_addr_end = cache_addr + size; -            MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); -            u8* host_ptr = FromCacheAddr(cache_addr); -            UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); +            auto& memory_manager = system.GPU().MemoryManager(); +            const VAddr cpu_addr_end = cpu_addr + size; +            MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); +            if (memory_manager.IsGranularRange(gpu_addr, size)) { +                u8* host_ptr = memory_manager.GetPointer(gpu_addr); +                UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); +            } else { +                staging_buffer.resize(size); +                memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); +                UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); +            }              Register(new_map);              return new_map;          } -        const CacheAddr cache_addr_end = cache_addr + size; +        const VAddr cpu_addr_end = cpu_addr + size;          if (overlaps.size() == 1) {              MapInterval& current_map = overlaps[0]; -            if (current_map->IsInside(cache_addr, cache_addr_end)) { +            if (current_map->IsInside(cpu_addr, cpu_addr_end)) {                  return current_map;              }          } -        CacheAddr new_start = cache_addr; -        CacheAddr new_end = cache_addr_end; +        VAddr new_start = cpu_addr; +        VAddr new_end = cpu_addr_end;          bool write_inheritance = false;          bool modified_inheritance = false;          // Calculate new buffer parameters @@ -237,7 +259,7 @@ private:              write_inheritance |= overlap->IsWritten();              modified_inheritance |= overlap->IsModified();          } -        GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; +        GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;          for (auto& overlap : overlaps) {              Unregister(overlap);          } @@ -250,7 +272,7 @@ private:          return new_map;      } -    void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, +    void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,                       std::vector<MapInterval>& overlaps) {          const IntervalType base_interval{start, end};          IntervalSet interval_set{}; @@ -262,13 +284,15 @@ private:          for (auto& interval : interval_set) {              std::size_t size = interval.upper() - interval.lower();              if (size > 0) { -                u8* host_ptr = FromCacheAddr(interval.lower()); -                UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); +                staging_buffer.resize(size); +                system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); +                UploadBlockData(block, block->GetOffset(interval.lower()), size, +                                staging_buffer.data());              }          }      } -    std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { +    std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {          if (size == 0) {              return {};          } @@ -290,8 +314,9 @@ private:      void FlushMap(MapInterval map) {          std::size_t size = map->GetEnd() - map->GetStart();          TBuffer block = blocks[map->GetStart() >> block_page_bits]; -        u8* host_ptr = FromCacheAddr(map->GetStart()); -        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); +        staging_buffer.resize(size); +        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); +        system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);          map->MarkAsModified(false, 0);      } @@ -316,14 +341,14 @@ private:      TBuffer EnlargeBlock(TBuffer buffer) {          const std::size_t old_size = buffer->GetSize();          const std::size_t new_size = old_size + block_page_size; -        const CacheAddr cache_addr = buffer->GetCacheAddr(); -        TBuffer new_buffer = CreateBlock(cache_addr, new_size); +        const VAddr cpu_addr = buffer->GetCpuAddr(); +        TBuffer new_buffer = CreateBlock(cpu_addr, new_size);          CopyBlock(buffer, new_buffer, 0, 0, old_size);          buffer->SetEpoch(epoch);          pending_destruction.push_back(buffer); -        const CacheAddr cache_addr_end = cache_addr + new_size - 1; -        u64 page_start = cache_addr >> block_page_bits; -        const u64 page_end = cache_addr_end >> block_page_bits; +        const VAddr cpu_addr_end = cpu_addr + new_size - 1; +        u64 page_start = cpu_addr >> block_page_bits; +        const u64 page_end = cpu_addr_end >> block_page_bits;          while (page_start <= page_end) {              blocks[page_start] = new_buffer;              ++page_start; @@ -334,9 +359,9 @@ private:      TBuffer MergeBlocks(TBuffer first, TBuffer second) {          const std::size_t size_1 = first->GetSize();          const std::size_t size_2 = second->GetSize(); -        const CacheAddr first_addr = first->GetCacheAddr(); -        const CacheAddr second_addr = second->GetCacheAddr(); -        const CacheAddr new_addr = std::min(first_addr, second_addr); +        const VAddr first_addr = first->GetCpuAddr(); +        const VAddr second_addr = second->GetCpuAddr(); +        const VAddr new_addr = std::min(first_addr, second_addr);          const std::size_t new_size = size_1 + size_2;          TBuffer new_buffer = CreateBlock(new_addr, new_size);          CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); @@ -345,9 +370,9 @@ private:          second->SetEpoch(epoch);          pending_destruction.push_back(first);          pending_destruction.push_back(second); -        const CacheAddr cache_addr_end = new_addr + new_size - 1; +        const VAddr cpu_addr_end = new_addr + new_size - 1;          u64 page_start = new_addr >> block_page_bits; -        const u64 page_end = cache_addr_end >> block_page_bits; +        const u64 page_end = cpu_addr_end >> block_page_bits;          while (page_start <= page_end) {              blocks[page_start] = new_buffer;              ++page_start; @@ -355,18 +380,18 @@ private:          return new_buffer;      } -    TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { +    TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {          TBuffer found{}; -        const CacheAddr cache_addr_end = cache_addr + size - 1; -        u64 page_start = cache_addr >> block_page_bits; -        const u64 page_end = cache_addr_end >> block_page_bits; +        const VAddr cpu_addr_end = cpu_addr + size - 1; +        u64 page_start = cpu_addr >> block_page_bits; +        const u64 page_end = cpu_addr_end >> block_page_bits;          while (page_start <= page_end) {              auto it = blocks.find(page_start);              if (it == blocks.end()) {                  if (found) {                      found = EnlargeBlock(found);                  } else { -                    const CacheAddr start_addr = (page_start << block_page_bits); +                    const VAddr start_addr = (page_start << block_page_bits);                      found = CreateBlock(start_addr, block_page_size);                      blocks[page_start] = found;                  } @@ -386,7 +411,7 @@ private:          return found;      } -    void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { +    void MarkRegionAsWritten(const VAddr start, const VAddr end) {          u64 page_start = start >> write_page_bit;          const u64 page_end = end >> write_page_bit;          while (page_start <= page_end) { @@ -400,7 +425,7 @@ private:          }      } -    void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { +    void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {          u64 page_start = start >> write_page_bit;          const u64 page_end = end >> write_page_bit;          while (page_start <= page_end) { @@ -416,7 +441,7 @@ private:          }      } -    bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { +    bool IsRegionWritten(const VAddr start, const VAddr end) const {          u64 page_start = start >> write_page_bit;          const u64 page_end = end >> write_page_bit;          while (page_start <= page_end) { @@ -440,8 +465,8 @@ private:      u64 buffer_offset = 0;      u64 buffer_offset_base = 0; -    using IntervalSet = boost::icl::interval_set<CacheAddr>; -    using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; +    using IntervalSet = boost::icl::interval_set<VAddr>; +    using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;      using IntervalType = typename IntervalCache::interval_type;      IntervalCache mapped_addresses; @@ -456,6 +481,8 @@ private:      u64 epoch = 0;      u64 modified_ticks = 0; +    std::vector<u8> staging_buffer; +      std::recursive_mutex mutex;  }; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 3a104d5cd..b0956029d 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -11,7 +11,7 @@ namespace VideoCommon {  class MapIntervalBase {  public: -    MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) +    MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)          : start{start}, end{end}, gpu_addr{gpu_addr} {}      void SetCpuAddress(VAddr new_cpu_addr) { @@ -26,7 +26,7 @@ public:          return gpu_addr;      } -    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { +    bool IsInside(const VAddr other_start, const VAddr other_end) const {          return (start <= other_start && other_end <= end);      } @@ -46,11 +46,11 @@ public:          return is_registered;      } -    CacheAddr GetStart() const { +    VAddr GetStart() const {          return start;      } -    CacheAddr GetEnd() const { +    VAddr GetEnd() const {          return end;      } @@ -76,8 +76,8 @@ public:      }  private: -    CacheAddr start; -    CacheAddr end; +    VAddr start; +    VAddr end;      GPUVAddr gpu_addr;      VAddr cpu_addr{};      bool is_written{}; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ced9d7e28..1a2d747be 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -270,13 +270,13 @@ public:      virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;      /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory -    virtual void FlushRegion(CacheAddr addr, u64 size) = 0; +    virtual void FlushRegion(VAddr addr, u64 size) = 0;      /// Notify rasterizer that any caches of the specified region should be invalidated -    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; +    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;      /// Notify rasterizer that any caches of the specified region should be flushed and invalidated -    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; +    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;  protected:      virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 925be8d7b..cc434faf7 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {      gpu_thread.SwapBuffers(framebuffer);  } -void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushRegion(VAddr addr, u64 size) {      gpu_thread.FlushRegion(addr, size);  } -void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {      gpu_thread.InvalidateRegion(addr, size);  } -void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {      gpu_thread.FlushAndInvalidateRegion(addr, size);  } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 265c62758..03fd0eef0 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -27,9 +27,9 @@ public:      void Start() override;      void PushGPUEntries(Tegra::CommandList&& entries) override;      void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; -    void FlushRegion(CacheAddr addr, u64 size) override; -    void InvalidateRegion(CacheAddr addr, u64 size) override; -    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; +    void FlushRegion(VAddr addr, u64 size) override; +    void InvalidateRegion(VAddr addr, u64 size) override; +    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void WaitIdle() const override;  protected: diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index bd5278a5c..6f38a672a 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {      renderer->SwapBuffers(framebuffer);  } -void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { +void GPUSynch::FlushRegion(VAddr addr, u64 size) {      renderer->Rasterizer().FlushRegion(addr, size);  } -void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {      renderer->Rasterizer().InvalidateRegion(addr, size);  } -void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {      renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);  } diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 866a94c8c..4a6e9a01d 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -26,9 +26,9 @@ public:      void Start() override;      void PushGPUEntries(Tegra::CommandList&& entries) override;      void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; -    void FlushRegion(CacheAddr addr, u64 size) override; -    void InvalidateRegion(CacheAddr addr, u64 size) override; -    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; +    void FlushRegion(VAddr addr, u64 size) override; +    void InvalidateRegion(VAddr addr, u64 size) override; +    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void WaitIdle() const override {}  protected: diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 270c7ae0d..10cda686b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {      PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));  } -void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushRegion(VAddr addr, u64 size) {      PushCommand(FlushRegionCommand(addr, size));  } -void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {      system.Renderer().Rasterizer().InvalidateRegion(addr, size);  } -void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {      // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important      InvalidateRegion(addr, size);  } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index be36c580e..cd74ad330 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -47,26 +47,26 @@ struct SwapBuffersCommand final {  /// Command to signal to the GPU thread to flush a region  struct FlushRegionCommand final { -    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} +    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} -    CacheAddr addr; +    VAddr addr;      u64 size;  };  /// Command to signal to the GPU thread to invalidate a region  struct InvalidateRegionCommand final { -    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} +    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} -    CacheAddr addr; +    VAddr addr;      u64 size;  };  /// Command to signal to the GPU thread to flush and invalidate a region  struct FlushAndInvalidateRegionCommand final { -    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) +    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)          : addr{addr}, size{size} {} -    CacheAddr addr; +    VAddr addr;      u64 size;  }; @@ -111,13 +111,13 @@ public:      void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);      /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory -    void FlushRegion(CacheAddr addr, u64 size); +    void FlushRegion(VAddr addr, u64 size);      /// Notify rasterizer that any caches of the specified region should be invalidated -    void InvalidateRegion(CacheAddr addr, u64 size); +    void InvalidateRegion(VAddr addr, u64 size);      /// Notify rasterizer that any caches of the specified region should be flushed and invalidated -    void FlushAndInvalidateRegion(CacheAddr addr, u64 size); +    void FlushAndInvalidateRegion(VAddr addr, u64 size);      // Wait until the gpu thread is idle.      void WaitIdle() const; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f5d33f27a..a3389d0d2 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {      ASSERT((gpu_addr & page_mask) == 0);      const u64 aligned_size{Common::AlignUp(size, page_size)}; -    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};      const auto cpu_addr = GpuToCpuAddress(gpu_addr);      ASSERT(cpu_addr);      // Flush and invalidate through the GPU interface, to be asynchronous if possible. -    system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); +    system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);      UnmapRange(gpu_addr, aligned_size);      ASSERT(system.CurrentProcess() @@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {          return {};      } -    const u8* page_pointer{page_table.pointers[addr >> page_bits]}; +    const u8* page_pointer{GetPointer(addr)};      if (page_pointer) {          // NOTE: Avoid adding any extra logic to this fast-path block          T value; -        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); +        std::memcpy(&value, page_pointer, sizeof(T));          return value;      } @@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {          return;      } -    u8* page_pointer{page_table.pointers[addr >> page_bits]}; +    u8* page_pointer{GetPointer(addr)};      if (page_pointer) {          // NOTE: Avoid adding any extra logic to this fast-path block -        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); +        std::memcpy(page_pointer, &data, sizeof(T));          return;      } @@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {          return {};      } -    u8* const page_pointer{page_table.pointers[addr >> page_bits]}; -    if (page_pointer != nullptr) { -        return page_pointer + (addr & page_mask); +    auto& memory = system.Memory(); + +    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + +    if (page_addr != 0) { +        return memory.GetPointer(page_addr + (addr & page_mask));      }      LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {          return {};      } -    const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; -    if (page_pointer != nullptr) { -        return page_pointer + (addr & page_mask); +    const auto& memory = system.Memory(); + +    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + +    if (page_addr != 0) { +        return memory.GetPointer(page_addr + (addr & page_mask));      }      LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s      std::size_t page_index{src_addr >> page_bits};      std::size_t page_offset{src_addr & page_mask}; +    auto& memory = system.Memory(); +      while (remaining_size > 0) {          const std::size_t copy_amount{              std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};          switch (page_table.attributes[page_index]) {          case Common::PageType::Memory: { -            const u8* src_ptr{page_table.pointers[page_index] + page_offset}; +            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};              // Flush must happen on the rasterizer interface, such that memory is always synchronous              // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. -            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); -            std::memcpy(dest_buffer, src_ptr, copy_amount); +            rasterizer.FlushRegion(src_addr, copy_amount); +            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);              break;          }          default: @@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,      std::size_t page_index{src_addr >> page_bits};      std::size_t page_offset{src_addr & page_mask}; +    auto& memory = system.Memory(); +      while (remaining_size > 0) {          const std::size_t copy_amount{              std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};          const u8* page_pointer = page_table.pointers[page_index];          if (page_pointer) { -            const u8* src_ptr{page_pointer + page_offset}; -            std::memcpy(dest_buffer, src_ptr, copy_amount); +            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; +            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);          } else {              std::memset(dest_buffer, 0, copy_amount);          } @@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const      std::size_t page_index{dest_addr >> page_bits};      std::size_t page_offset{dest_addr & page_mask}; +    auto& memory = system.Memory(); +      while (remaining_size > 0) {          const std::size_t copy_amount{              std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};          switch (page_table.attributes[page_index]) {          case Common::PageType::Memory: { -            u8* dest_ptr{page_table.pointers[page_index] + page_offset}; +            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};              // Invalidate must happen on the rasterizer interface, such that memory is always              // synchronous when it is written (even when in asynchronous GPU mode). -            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); -            std::memcpy(dest_ptr, src_buffer, copy_amount); +            rasterizer.InvalidateRegion(dest_addr, copy_amount); +            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);              break;          }          default: @@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,      std::size_t page_index{dest_addr >> page_bits};      std::size_t page_offset{dest_addr & page_mask}; +    auto& memory = system.Memory(); +      while (remaining_size > 0) {          const std::size_t copy_amount{              std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};          u8* page_pointer = page_table.pointers[page_index];          if (page_pointer) { -            u8* dest_ptr{page_pointer + page_offset}; -            std::memcpy(dest_ptr, src_buffer, copy_amount); +            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; +            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);          }          page_index++;          page_offset = 0; @@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,  }  void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { -    std::size_t remaining_size{size}; -    std::size_t page_index{src_addr >> page_bits}; -    std::size_t page_offset{src_addr & page_mask}; - -    while (remaining_size > 0) { -        const std::size_t copy_amount{ -            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - -        switch (page_table.attributes[page_index]) { -        case Common::PageType::Memory: { -            // Flush must happen on the rasterizer interface, such that memory is always synchronous -            // when it is copied (even when in asynchronous GPU mode). -            const u8* src_ptr{page_table.pointers[page_index] + page_offset}; -            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); -            WriteBlock(dest_addr, src_ptr, copy_amount); -            break; -        } -        default: -            UNREACHABLE(); -        } - -        page_index++; -        page_offset = 0; -        dest_addr += static_cast<VAddr>(copy_amount); -        src_addr += static_cast<VAddr>(copy_amount); -        remaining_size -= copy_amount; -    } +    std::vector<u8> tmp_buffer(size); +    ReadBlock(src_addr, tmp_buffer.data(), size); +    WriteBlock(dest_addr, tmp_buffer.data(), size);  }  void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { @@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const      WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);  } +bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { +    const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; +    const std::size_t page = (addr & Memory::PAGE_MASK) + size; +    return page <= Memory::PAGE_SIZE; +} +  void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,                               VAddr backing_addr) {      LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 073bdb491..0d9468535 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -97,6 +97,11 @@ public:      void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);      void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); +    /** +     * IsGranularRange checks if a gpu region can be simply read with a pointer +     */ +    bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); +  private:      using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;      using VMAHandle = VMAMap::const_iterator; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index e66054ed0..5ea2b01f2 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -98,12 +98,12 @@ public:                                                        static_cast<QueryCache&>(*this),                                                        VideoCore::QueryType::SamplesPassed}}} {} -    void InvalidateRegion(CacheAddr addr, std::size_t size) { +    void InvalidateRegion(VAddr addr, std::size_t size) {          std::unique_lock lock{mutex};          FlushAndRemoveRegion(addr, size);      } -    void FlushRegion(CacheAddr addr, std::size_t size) { +    void FlushRegion(VAddr addr, std::size_t size) {          std::unique_lock lock{mutex};          FlushAndRemoveRegion(addr, size);      } @@ -117,14 +117,16 @@ public:      void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {          std::unique_lock lock{mutex};          auto& memory_manager = system.GPU().MemoryManager(); -        const auto host_ptr = memory_manager.GetPointer(gpu_addr); +        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); +        ASSERT(cpu_addr_opt); +        VAddr cpu_addr = *cpu_addr_opt; -        CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); +        CachedQuery* query = TryGet(cpu_addr);          if (!query) { -            const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); -            ASSERT_OR_EXECUTE(cpu_addr, return;); +            ASSERT_OR_EXECUTE(cpu_addr_opt, return;); +            const auto host_ptr = memory_manager.GetPointer(gpu_addr); -            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); +            query = Register(type, cpu_addr, host_ptr, timestamp.has_value());          }          query->BindCounter(Stream(type).Current(), timestamp); @@ -173,11 +175,11 @@ protected:  private:      /// Flushes a memory range to guest memory and removes it from the cache. -    void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { +    void FlushAndRemoveRegion(VAddr addr, std::size_t size) {          const u64 addr_begin = static_cast<u64>(addr);          const u64 addr_end = addr_begin + static_cast<u64>(size);          const auto in_range = [addr_begin, addr_end](CachedQuery& query) { -            const u64 cache_begin = query.GetCacheAddr(); +            const u64 cache_begin = query.GetCpuAddr();              const u64 cache_end = cache_begin + query.SizeInBytes();              return cache_begin < addr_end && addr_begin < cache_end;          }; @@ -193,7 +195,7 @@ private:                  if (!in_range(query)) {                      continue;                  } -                rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); +                rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);                  query.Flush();              }              contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), @@ -204,22 +206,21 @@ private:      /// Registers the passed parameters as cached and returns a pointer to the stored cached query.      CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {          rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); -        const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; +        const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;          return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,                                                    host_ptr);      }      /// Tries to a get a cached query. Returns nullptr on failure. -    CachedQuery* TryGet(CacheAddr addr) { +    CachedQuery* TryGet(VAddr addr) {          const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;          const auto it = cached_queries.find(page);          if (it == std::end(cached_queries)) {              return nullptr;          }          auto& contents = it->second; -        const auto found = -            std::find_if(std::begin(contents), std::end(contents), -                         [addr](auto& query) { return query.GetCacheAddr() == addr; }); +        const auto found = std::find_if(std::begin(contents), std::end(contents), +                                        [addr](auto& query) { return query.GetCpuAddr() == addr; });          return found != std::end(contents) ? &*found : nullptr;      } @@ -323,14 +324,10 @@ public:          timestamp = timestamp_;      } -    VAddr CpuAddr() const noexcept { +    VAddr GetCpuAddr() const noexcept {          return cpu_addr;      } -    CacheAddr GetCacheAddr() const noexcept { -        return ToCacheAddr(host_ptr); -    } -      u64 SizeInBytes() const noexcept {          return SizeInBytes(timestamp.has_value());      } diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 6de1597a2..22987751e 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -18,22 +18,14 @@  class RasterizerCacheObject {  public: -    explicit RasterizerCacheObject(const u8* host_ptr) -        : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} +    explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}      virtual ~RasterizerCacheObject(); -    CacheAddr GetCacheAddr() const { -        return cache_addr; +    VAddr GetCpuAddr() const { +        return cpu_addr;      } -    const u8* GetHostPtr() const { -        return host_ptr; -    } - -    /// Gets the address of the shader in guest memory, required for cache management -    virtual VAddr GetCpuAddr() const = 0; -      /// Gets the size of the shader in guest memory, required for cache management      virtual std::size_t GetSizeInBytes() const = 0; @@ -68,8 +60,7 @@ private:      bool is_registered{};      ///< Whether the object is currently registered with the cache      bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)      u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing -    const u8* host_ptr{};      ///< Pointer to the memory backing this cached region -    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space +    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space  };  template <class T> @@ -80,7 +71,7 @@ public:      explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}      /// Write any cached resources overlapping the specified region back to memory -    void FlushRegion(CacheAddr addr, std::size_t size) { +    void FlushRegion(VAddr addr, std::size_t size) {          std::lock_guard lock{mutex};          const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -90,7 +81,7 @@ public:      }      /// Mark the specified region as being invalidated -    void InvalidateRegion(CacheAddr addr, u64 size) { +    void InvalidateRegion(VAddr addr, u64 size) {          std::lock_guard lock{mutex};          const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -114,27 +105,20 @@ public:  protected:      /// Tries to get an object from the cache with the specified cache address -    T TryGet(CacheAddr addr) const { +    T TryGet(VAddr addr) const {          const auto iter = map_cache.find(addr);          if (iter != map_cache.end())              return iter->second;          return nullptr;      } -    T TryGet(const void* addr) const { -        const auto iter = map_cache.find(ToCacheAddr(addr)); -        if (iter != map_cache.end()) -            return iter->second; -        return nullptr; -    } -      /// Register an object into the cache      virtual void Register(const T& object) {          std::lock_guard lock{mutex};          object->SetIsRegistered(true);          interval_cache.add({GetInterval(object), ObjectSet{object}}); -        map_cache.insert({object->GetCacheAddr(), object}); +        map_cache.insert({object->GetCpuAddr(), object});          rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);      } @@ -144,7 +128,7 @@ protected:          object->SetIsRegistered(false);          rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); -        const CacheAddr addr = object->GetCacheAddr(); +        const VAddr addr = object->GetCpuAddr();          interval_cache.subtract({GetInterval(object), ObjectSet{object}});          map_cache.erase(addr);      } @@ -173,7 +157,7 @@ protected:  private:      /// Returns a list of cached objects from the specified memory region, ordered by access time -    std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { +    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {          if (size == 0) {              return {};          } @@ -197,13 +181,13 @@ private:      }      using ObjectSet = std::set<T>; -    using ObjectCache = std::unordered_map<CacheAddr, T>; -    using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; +    using ObjectCache = std::unordered_map<VAddr, T>; +    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;      using ObjectInterval = typename IntervalCache::interval_type;      static auto GetInterval(const T& object) { -        return ObjectInterval::right_open(object->GetCacheAddr(), -                                          object->GetCacheAddr() + object->GetSizeInBytes()); +        return ObjectInterval::right_open(object->GetCpuAddr(), +                                          object->GetCpuAddr() + object->GetSizeInBytes());      }      ObjectCache map_cache; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1a68e3caa..8ae5b9c4e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -53,14 +53,14 @@ public:      virtual void FlushAll() = 0;      /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory -    virtual void FlushRegion(CacheAddr addr, u64 size) = 0; +    virtual void FlushRegion(VAddr addr, u64 size) = 0;      /// Notify rasterizer that any caches of the specified region should be invalidated -    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; +    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;      /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory      /// and invalidated -    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; +    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;      /// Notify the rasterizer to send all written commands to the host GPU.      virtual void FlushCommands() = 0; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0375fca17..4eb37a96c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;  MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) -    : VideoCommon::BufferBlock{cache_addr, size} { +CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) +    : VideoCommon::BufferBlock{cpu_addr, size} {      gl_buffer.Create();      glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);  } @@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {      glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));  } -Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { -    return std::make_shared<CachedBufferBlock>(cache_addr, size); +Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { +    return std::make_shared<CachedBufferBlock>(cpu_addr, size);  }  void OGLBufferCache::WriteBarrier() { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c7145443..d94a11252 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf  class CachedBufferBlock : public VideoCommon::BufferBlock {  public: -    explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); +    explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);      ~CachedBufferBlock();      const GLuint* GetHandle() const { @@ -55,7 +55,7 @@ public:      }  protected: -    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; +    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;      void WriteBarrier() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 346feeb2f..368f399df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,  void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    if (!addr || !size) { +    if (addr == 0 || size == 0) {          return;      }      texture_cache.FlushRegion(addr, size); @@ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {      query_cache.FlushRegion(addr, size);  } -void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    if (!addr || !size) { +    if (addr == 0 || size == 0) {          return;      }      texture_cache.InvalidateRegion(addr, size); @@ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {      query_cache.InvalidateRegion(addr, size);  } -void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {      if (Settings::values.use_accurate_gpu_emulation) {          FlushRegion(addr, size);      } @@ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    const auto surface{ -        texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))}; +    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};      if (!surface) {          return {};      } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2d3be2437..212dad852 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -65,9 +65,9 @@ public:      void ResetCounter(VideoCore::QueryType type) override;      void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;      void FlushAll() override; -    void FlushRegion(CacheAddr addr, u64 size) override; -    void InvalidateRegion(CacheAddr addr, u64 size) override; -    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; +    void FlushRegion(VAddr addr, u64 size) override; +    void InvalidateRegion(VAddr addr, u64 size) override; +    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void FlushCommands() override;      void TickFrame() override;      bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 046ee55a5..6d2ff20f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {  } // Anonymous namespace -CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, +CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,                             std::shared_ptr<VideoCommon::Shader::Registry> registry,                             ShaderEntries entries, std::shared_ptr<OGLProgram> program) -    : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, -      cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} +    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, +      size_in_bytes{size_in_bytes}, program{std::move(program)} {}  CachedShader::~CachedShader() = default; @@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,      entry.bindless_samplers = registry->GetBindlessSamplers();      params.disk_cache.SaveEntry(std::move(entry)); -    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, -                                                          size_in_bytes, std::move(registry), -                                                          MakeEntries(ir), std::move(program))); +    return std::shared_ptr<CachedShader>(new CachedShader( +        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));  }  Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { @@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog      entry.bindless_samplers = registry->GetBindlessSamplers();      params.disk_cache.SaveEntry(std::move(entry)); -    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, -                                                          size_in_bytes, std::move(registry), -                                                          MakeEntries(ir), std::move(program))); +    return std::shared_ptr<CachedShader>(new CachedShader( +        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));  }  Shader CachedShader::CreateFromCache(const ShaderParameters& params,                                       const PrecompiledShader& precompiled_shader,                                       std::size_t size_in_bytes) { -    return std::shared_ptr<CachedShader>(new CachedShader( -        params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, -        precompiled_shader.entries, precompiled_shader.program)); +    return std::shared_ptr<CachedShader>( +        new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, +                         precompiled_shader.entries, precompiled_shader.program));  }  ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {      const GPUVAddr address{GetShaderAddress(system, program)};      // Look up shader in the cache based on address -    const auto host_ptr{memory_manager.GetPointer(address)}; -    Shader shader{TryGet(host_ptr)}; +    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; +    Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};      if (shader) {          return last_shaders[static_cast<std::size_t>(program)] = shader;      } +    const auto host_ptr{memory_manager.GetPointer(address)}; +      // No shader found - create a new one      ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};      ProgramCode code_b; @@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {      const auto unique_identifier = GetUniqueIdentifier(          GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); -    const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; -    const ShaderParameters params{system,   disk_cache, device, -                                  cpu_addr, host_ptr,   unique_identifier}; + +    const ShaderParameters params{system,    disk_cache, device, +                                  *cpu_addr, host_ptr,   unique_identifier};      const auto found = runtime_cache.find(unique_identifier);      if (found == runtime_cache.end()) { @@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {  Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {      auto& memory_manager{system.GPU().MemoryManager()}; -    const auto host_ptr{memory_manager.GetPointer(code_addr)}; -    auto kernel = TryGet(host_ptr); +    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; + +    auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;      if (kernel) {          return kernel;      } +    const auto host_ptr{memory_manager.GetPointer(code_addr)};      // No kernel found, create a new one      auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};      const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; -    const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; -    const ShaderParameters params{system,   disk_cache, device, -                                  cpu_addr, host_ptr,   unique_identifier}; + +    const ShaderParameters params{system,    disk_cache, device, +                                  *cpu_addr, host_ptr,   unique_identifier};      const auto found = runtime_cache.find(unique_identifier);      if (found == runtime_cache.end()) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 4935019fc..c836df5bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,11 +65,6 @@ public:      /// Gets the GL program handle for the shader      GLuint GetHandle() const; -    /// Returns the guest CPU address of the shader -    VAddr GetCpuAddr() const override { -        return cpu_addr; -    } -      /// Returns the size in bytes of the shader      std::size_t GetSizeInBytes() const override {          return size_in_bytes; @@ -90,13 +85,12 @@ public:                                    std::size_t size_in_bytes);  private: -    explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, +    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,                            std::shared_ptr<VideoCommon::Shader::Registry> registry,                            ShaderEntries entries, std::shared_ptr<OGLProgram> program);      std::shared_ptr<VideoCommon::Shader::Registry> registry;      ShaderEntries entries; -    VAddr cpu_addr = 0;      std::size_t size_in_bytes = 0;      std::shared_ptr<OGLProgram> program;  }; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1ba544943..326d74f29 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {  } // Anonymous namespace  CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, -                                     CacheAddr cache_addr, std::size_t size) -    : VideoCommon::BufferBlock{cache_addr, size} { +                                     VAddr cpu_addr, std::size_t size) +    : VideoCommon::BufferBlock{cpu_addr, size} {      const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),                                           BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |                                               vk::BufferUsageFlagBits::eTransferDst, @@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S  VKBufferCache::~VKBufferCache() = default; -Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { -    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); +Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { +    return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);  }  const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3f38eed0c..508214618 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -30,7 +30,7 @@ class VKScheduler;  class CachedBufferBlock final : public VideoCommon::BufferBlock {  public:      explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, -                               CacheAddr cache_addr, std::size_t size); +                               VAddr cpu_addr, std::size_t size);      ~CachedBufferBlock();      const vk::Buffer* GetHandle() const { @@ -55,7 +55,7 @@ public:  protected:      void WriteBarrier() override {} -    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; +    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;      const vk::Buffer* ToHandle(const Buffer& buffer) override; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 557b9d662..c2a426aeb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,  } // Anonymous namespace  CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, -                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, -                           ProgramCode program_code, u32 main_offset) -    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, -      program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, -      shader_ir{this->program_code, main_offset, compiler_settings, registry}, +                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, +                           u32 main_offset) +    : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, +      registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, +                                                           compiler_settings, registry},        entries{GenerateShaderEntries(shader_ir)} {}  CachedShader::~CachedShader() = default; @@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {          auto& memory_manager{system.GPU().MemoryManager()};          const GPUVAddr program_addr{GetShaderAddress(system, program)}; -        const auto host_ptr{memory_manager.GetPointer(program_addr)}; -        auto shader = TryGet(host_ptr); +        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); +        ASSERT(cpu_addr); +        auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;          if (!shader) { +            const auto host_ptr{memory_manager.GetPointer(program_addr)}; +              // No shader found - create a new one              constexpr u32 stage_offset = 10;              const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);              auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); -            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); -            ASSERT(cpu_addr); -              shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, -                                                    host_ptr, std::move(code), stage_offset); +                                                    std::move(code), stage_offset);              Register(shader);          }          shaders[index] = std::move(shader); @@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach      auto& memory_manager = system.GPU().MemoryManager();      const auto program_addr = key.shader; -    const auto host_ptr = memory_manager.GetPointer(program_addr); -    auto shader = TryGet(host_ptr); +    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); +    ASSERT(cpu_addr); + +    auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;      if (!shader) {          // No shader found - create a new one -        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); -        ASSERT(cpu_addr); +        const auto host_ptr = memory_manager.GetPointer(program_addr);          auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);          constexpr u32 kernel_main_offset = 0;          shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, -                                                program_addr, *cpu_addr, host_ptr, std::move(code), +                                                program_addr, *cpu_addr, std::move(code),                                                  kernel_main_offset);          Register(shader);      } @@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {          }          const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); -        const auto host_ptr = memory_manager.GetPointer(gpu_addr); -        const auto shader = TryGet(host_ptr); +        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); +        ASSERT(cpu_addr); +        const auto shader = TryGet(*cpu_addr);          ASSERT(shader);          const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c4c112290..27c01732f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -113,17 +113,13 @@ namespace Vulkan {  class CachedShader final : public RasterizerCacheObject {  public:      explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, -                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); +                          VAddr cpu_addr, ProgramCode program_code, u32 main_offset);      ~CachedShader();      GPUVAddr GetGpuAddr() const {          return gpu_addr;      } -    VAddr GetCpuAddr() const override { -        return cpu_addr; -    } -      std::size_t GetSizeInBytes() const override {          return program_code.size() * sizeof(u64);      } @@ -149,7 +145,6 @@ private:                                                                   Tegra::Engines::ShaderType stage);      GPUVAddr gpu_addr{}; -    VAddr cpu_addr{};      ProgramCode program_code;      VideoCommon::Shader::Registry registry;      VideoCommon::Shader::ShaderIR shader_ir; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 58c69b786..0a2ea4fd4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,  void RasterizerVulkan::FlushAll() {} -void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { +    if (addr == 0 || size == 0) { +        return; +    }      texture_cache.FlushRegion(addr, size);      buffer_cache.FlushRegion(addr, size);      query_cache.FlushRegion(addr, size);  } -void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { +    if (addr == 0 || size == 0) { +        return; +    }      texture_cache.InvalidateRegion(addr, size);      pipeline_cache.InvalidateRegion(addr, size);      buffer_cache.InvalidateRegion(addr, size);      query_cache.InvalidateRegion(addr, size);  } -void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {      FlushRegion(addr, size);      InvalidateRegion(addr, size);  } @@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,          return false;      } -    const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; -    const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; +    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};      if (!surface) {          return false;      } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3185868e9..f642dde76 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -118,9 +118,9 @@ public:      void ResetCounter(VideoCore::QueryType type) override;      void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;      void FlushAll() override; -    void FlushRegion(CacheAddr addr, u64 size) override; -    void InvalidateRegion(CacheAddr addr, u64 size) override; -    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; +    void FlushRegion(VAddr addr, u64 size) override; +    void InvalidateRegion(VAddr addr, u64 size) override; +    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void FlushCommands() override;      void TickFrame() override;      bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 6fe815135..7af0e792c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,      MICROPROFILE_SCOPE(GPU_Load_Texture);      auto& staging_buffer = staging_cache.GetBuffer(0);      u8* host_ptr; -    is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); - -    // Handle continuouty -    if (is_continuous) { -        // Use physical memory directly -        host_ptr = memory_manager.GetPointer(gpu_addr); -        if (!host_ptr) { -            return; -        } -    } else { -        // Use an extra temporal buffer -        auto& tmp_buffer = staging_cache.GetBuffer(1); -        tmp_buffer.resize(guest_memory_size); -        host_ptr = tmp_buffer.data(); -        memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); -    } +    // Use an extra temporal buffer +    auto& tmp_buffer = staging_cache.GetBuffer(1); +    tmp_buffer.resize(guest_memory_size); +    host_ptr = tmp_buffer.data(); +    memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);      if (params.is_tiled) {          ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", @@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,      auto& staging_buffer = staging_cache.GetBuffer(0);      u8* host_ptr; -    // Handle continuouty -    if (is_continuous) { -        // Use physical memory directly -        host_ptr = memory_manager.GetPointer(gpu_addr); -        if (!host_ptr) { -            return; -        } -    } else { -        // Use an extra temporal buffer -        auto& tmp_buffer = staging_cache.GetBuffer(1); -        tmp_buffer.resize(guest_memory_size); -        host_ptr = tmp_buffer.data(); -    } +    // Use an extra temporal buffer +    auto& tmp_buffer = staging_cache.GetBuffer(1); +    tmp_buffer.resize(guest_memory_size); +    host_ptr = tmp_buffer.data();      if (params.is_tiled) {          ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); @@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,              }          }      } -    if (!is_continuous) { -        memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); -    } +    memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);  }  } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d7882a031..a39a8661b 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -68,8 +68,8 @@ public:          return gpu_addr;      } -    bool Overlaps(const CacheAddr start, const CacheAddr end) const { -        return (cache_addr < end) && (cache_addr_end > start); +    bool Overlaps(const VAddr start, const VAddr end) const { +        return (cpu_addr < end) && (cpu_addr_end > start);      }      bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { @@ -86,21 +86,13 @@ public:          return cpu_addr;      } -    void SetCpuAddr(const VAddr new_addr) { -        cpu_addr = new_addr; -    } - -    CacheAddr GetCacheAddr() const { -        return cache_addr; -    } - -    CacheAddr GetCacheAddrEnd() const { -        return cache_addr_end; +    VAddr GetCpuAddrEnd() const { +        return cpu_addr_end;      } -    void SetCacheAddr(const CacheAddr new_addr) { -        cache_addr = new_addr; -        cache_addr_end = new_addr + guest_memory_size; +    void SetCpuAddr(const VAddr new_addr) { +        cpu_addr = new_addr; +        cpu_addr_end = new_addr + guest_memory_size;      }      const SurfaceParams& GetSurfaceParams() const { @@ -119,14 +111,6 @@ public:          return mipmap_sizes[level];      } -    void MarkAsContinuous(const bool is_continuous) { -        this->is_continuous = is_continuous; -    } - -    bool IsContinuous() const { -        return is_continuous; -    } -      bool IsLinear() const {          return !params.is_tiled;      } @@ -175,10 +159,8 @@ protected:      std::size_t guest_memory_size;      std::size_t host_memory_size;      GPUVAddr gpu_addr{}; -    CacheAddr cache_addr{}; -    CacheAddr cache_addr_end{};      VAddr cpu_addr{}; -    bool is_continuous{}; +    VAddr cpu_addr_end{};      bool is_converted{};      std::vector<std::size_t> mipmap_sizes; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8f8d659d..88fe3e25f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;  template <typename TSurface, typename TView>  class TextureCache { -    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; -    using IntervalType = typename IntervalMap::interval_type;  public: -    void InvalidateRegion(CacheAddr addr, std::size_t size) { +    void InvalidateRegion(VAddr addr, std::size_t size) {          std::lock_guard lock{mutex};          for (const auto& surface : GetSurfacesInRegion(addr, size)) { @@ -76,7 +74,7 @@ public:          guard_samplers = new_guard;      } -    void FlushRegion(CacheAddr addr, std::size_t size) { +    void FlushRegion(VAddr addr, std::size_t size) {          std::lock_guard lock{mutex};          auto surfaces = GetSurfacesInRegion(addr, size); @@ -99,9 +97,9 @@ public:              return GetNullSurface(SurfaceParams::ExpectedTarget(entry));          } -        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; -        const auto cache_addr{ToCacheAddr(host_ptr)}; -        if (!cache_addr) { +        const std::optional<VAddr> cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); +        if (!cpu_addr) {              return GetNullSurface(SurfaceParams::ExpectedTarget(entry));          } @@ -110,7 +108,7 @@ public:          }          const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; -        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); +        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);          if (guard_samplers) {              sampled_textures.push_back(surface);          } @@ -124,13 +122,13 @@ public:          if (!gpu_addr) {              return GetNullSurface(SurfaceParams::ExpectedTarget(entry));          } -        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; -        const auto cache_addr{ToCacheAddr(host_ptr)}; -        if (!cache_addr) { +        const std::optional<VAddr> cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); +        if (!cpu_addr) {              return GetNullSurface(SurfaceParams::ExpectedTarget(entry));          }          const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; -        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); +        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);          if (guard_samplers) {              sampled_textures.push_back(surface);          } @@ -159,14 +157,14 @@ public:              SetEmptyDepthBuffer();              return {};          } -        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; -        const auto cache_addr{ToCacheAddr(host_ptr)}; -        if (!cache_addr) { +        const std::optional<VAddr> cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); +        if (!cpu_addr) {              SetEmptyDepthBuffer();              return {};          }          const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; -        auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); +        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);          if (depth_buffer.target)              depth_buffer.target->MarkAsRenderTarget(false, NO_RT);          depth_buffer.target = surface_view.first; @@ -199,15 +197,15 @@ public:              return {};          } -        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; -        const auto cache_addr{ToCacheAddr(host_ptr)}; -        if (!cache_addr) { +        const std::optional<VAddr> cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); +        if (!cpu_addr) {              SetEmptyColorBuffer(index);              return {};          }          auto surface_view = -            GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), +            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),                         preserve_contents, true);          if (render_targets[index].target)              render_targets[index].target->MarkAsRenderTarget(false, NO_RT); @@ -257,27 +255,26 @@ public:          const GPUVAddr src_gpu_addr = src_config.Address();          const GPUVAddr dst_gpu_addr = dst_config.Address();          DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); -        const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; -        const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; -        const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; -        const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; +        const std::optional<VAddr> dst_cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); +        const std::optional<VAddr> src_cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);          std::pair<TSurface, TView> dst_surface = -            GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); +            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);          std::pair<TSurface, TView> src_surface = -            GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); +            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);          ImageBlit(src_surface.second, dst_surface.second, copy_config);          dst_surface.first->MarkAsModified(true, Tick());      } -    TSurface TryFindFramebufferSurface(const u8* host_ptr) { -        const CacheAddr cache_addr = ToCacheAddr(host_ptr); -        if (!cache_addr) { +    TSurface TryFindFramebufferSurface(VAddr addr) { +        if (!addr) {              return nullptr;          } -        const CacheAddr page = cache_addr >> registry_page_bits; +        const VAddr page = addr >> registry_page_bits;          std::vector<TSurface>& list = registry[page];          for (auto& surface : list) { -            if (surface->GetCacheAddr() == cache_addr) { +            if (surface->GetCpuAddr() == addr) {                  return surface;              }          } @@ -338,18 +335,14 @@ protected:      void Register(TSurface surface) {          const GPUVAddr gpu_addr = surface->GetGpuAddr(); -        const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));          const std::size_t size = surface->GetSizeInBytes();          const std::optional<VAddr> cpu_addr =              system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); -        if (!cache_ptr || !cpu_addr) { +        if (!cpu_addr) {              LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",                           gpu_addr);              return;          } -        const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); -        surface->MarkAsContinuous(continuous); -        surface->SetCacheAddr(cache_ptr);          surface->SetCpuAddr(*cpu_addr);          RegisterInnerCache(surface);          surface->MarkAsRegistered(true); @@ -634,7 +627,7 @@ private:      std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,                                                                 const SurfaceParams& params,                                                                 const GPUVAddr gpu_addr, -                                                               const CacheAddr cache_addr, +                                                               const VAddr cpu_addr,                                                                 bool preserve_contents) {          if (params.target == SurfaceTarget::Texture3D) {              bool failed = false; @@ -659,7 +652,7 @@ private:                      failed = true;                      break;                  } -                const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); +                const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);                  const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);                  modified |= surface->IsModified();                  const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, @@ -679,7 +672,7 @@ private:          } else {              for (const auto& surface : overlaps) {                  if (!surface->MatchTarget(params.target)) { -                    if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { +                    if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {                          if (Settings::values.use_accurate_gpu_emulation) {                              return std::nullopt;                          } @@ -688,7 +681,7 @@ private:                      }                      return std::nullopt;                  } -                if (surface->GetCacheAddr() != cache_addr) { +                if (surface->GetCpuAddr() != cpu_addr) {                      continue;                  }                  if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { @@ -722,13 +715,13 @@ private:       *                          left blank.       * @param is_render         Whether or not the surface is a render target.       **/ -    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, +    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,                                            const SurfaceParams& params, bool preserve_contents,                                            bool is_render) {          // Step 1          // Check Level 1 Cache for a fast structural match. If candidate surface          // matches at certain level we are pretty much done. -        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { +        if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {              TSurface& current_surface = iter->second;              const auto topological_result = current_surface->MatchesTopology(params);              if (topological_result != MatchTopologyResult::FullMatch) { @@ -755,7 +748,7 @@ private:          // Step 2          // Obtain all possible overlaps in the memory region          const std::size_t candidate_size = params.GetGuestSizeInBytes(); -        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; +        auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};          // If none are found, we are done. we just load the surface and create it.          if (overlaps.empty()) { @@ -777,7 +770,7 @@ private:          // Check if it's a 3D texture          if (params.block_depth > 0) {              auto surface = -                Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); +                Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);              if (surface) {                  return *surface;              } @@ -852,16 +845,16 @@ private:       * @param params   The parameters on the candidate surface.       **/      Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { -        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; -        const auto cache_addr{ToCacheAddr(host_ptr)}; +        const std::optional<VAddr> cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); -        if (!cache_addr) { +        if (!cpu_addr) {              Deduction result{};              result.type = DeductionType::DeductionFailed;              return result;          } -        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { +        if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {              TSurface& current_surface = iter->second;              const auto topological_result = current_surface->MatchesTopology(params);              if (topological_result != MatchTopologyResult::FullMatch) { @@ -880,7 +873,7 @@ private:          }          const std::size_t candidate_size = params.GetGuestSizeInBytes(); -        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; +        auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};          if (overlaps.empty()) {              Deduction result{}; @@ -1024,10 +1017,10 @@ private:      }      void RegisterInnerCache(TSurface& surface) { -        const CacheAddr cache_addr = surface->GetCacheAddr(); -        CacheAddr start = cache_addr >> registry_page_bits; -        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; -        l1_cache[cache_addr] = surface; +        const VAddr cpu_addr = surface->GetCpuAddr(); +        VAddr start = cpu_addr >> registry_page_bits; +        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; +        l1_cache[cpu_addr] = surface;          while (start <= end) {              registry[start].push_back(surface);              start++; @@ -1035,10 +1028,10 @@ private:      }      void UnregisterInnerCache(TSurface& surface) { -        const CacheAddr cache_addr = surface->GetCacheAddr(); -        CacheAddr start = cache_addr >> registry_page_bits; -        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; -        l1_cache.erase(cache_addr); +        const VAddr cpu_addr = surface->GetCpuAddr(); +        VAddr start = cpu_addr >> registry_page_bits; +        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; +        l1_cache.erase(cpu_addr);          while (start <= end) {              auto& reg{registry[start]};              reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -1046,18 +1039,18 @@ private:          }      } -    std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { +    std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {          if (size == 0) {              return {};          } -        const CacheAddr cache_addr_end = cache_addr + size; -        CacheAddr start = cache_addr >> registry_page_bits; -        const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; +        const VAddr cpu_addr_end = cpu_addr + size; +        VAddr start = cpu_addr >> registry_page_bits; +        const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;          std::vector<TSurface> surfaces;          while (start <= end) {              std::vector<TSurface>& list = registry[start];              for (auto& surface : list) { -                if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { +                if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {                      surface->MarkAsPicked(true);                      surfaces.push_back(surface);                  } @@ -1146,14 +1139,14 @@ private:      // large in size.      static constexpr u64 registry_page_bits{20};      static constexpr u64 registry_page_size{1 << registry_page_bits}; -    std::unordered_map<CacheAddr, std::vector<TSurface>> registry; +    std::unordered_map<VAddr, std::vector<TSurface>> registry;      static constexpr u32 DEPTH_RT = 8;      static constexpr u32 NO_RT = 0xFFFFFFFF;      // The L1 Cache is used for fast texture lookup before checking the overlaps      // This avoids calculating size and other stuffs. -    std::unordered_map<CacheAddr, TSurface> l1_cache; +    std::unordered_map<VAddr, TSurface> l1_cache;      /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have      /// previously been used. This is to prevent surfaces from being constantly created and | 
