diff options
| author | bunnei <bunneidev@gmail.com> | 2019-08-29 13:07:01 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-08-29 13:07:01 -0400 | 
| commit | e4246158393fc011b16a4c1ef023c167cde9d4f8 (patch) | |
| tree | 461f4e0a45a1d44bc2f81a3ef7f35cb10cece680 /src/video_core | |
| parent | f8cc5668f80d0c63f5ce850286760807462e1d72 (diff) | |
| parent | 83ec2091c1836bf32e9070d0ddf2a53288871d69 (diff) | |
Merge pull request #2783 from FernandoS27/new-buffer-cache
Implement a New LLE Buffer Cache
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/video_core/buffer_cache.h | 299 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_block.h | 77 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 449 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/map_interval.h | 89 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 51 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 39 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | 
9 files changed, 684 insertions, 330 deletions
| diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f315e021d..e2f85c5f1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,5 +1,7 @@  add_library(video_core STATIC -    buffer_cache.h +    buffer_cache/buffer_block.h +    buffer_cache/buffer_cache.h +    buffer_cache/map_interval.h      dma_pusher.cpp      dma_pusher.h      debug_utils/debug_utils.cpp diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h deleted file mode 100644 index 6f868b8b4..000000000 --- a/src/video_core/buffer_cache.h +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <memory> -#include <mutex> -#include <unordered_map> -#include <unordered_set> -#include <utility> -#include <vector> - -#include "common/alignment.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/memory_manager.h" -#include "video_core/rasterizer_cache.h" - -namespace VideoCore { -class RasterizerInterface; -} - -namespace VideoCommon { - -template <typename BufferStorageType> -class CachedBuffer final : public RasterizerCacheObject { -public: -    explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) -        : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} -    ~CachedBuffer() override = default; - -    VAddr GetCpuAddr() const override { -        return cpu_addr; -    } - -    std::size_t GetSizeInBytes() const override { -        return size; -    } - -    u8* GetWritableHostPtr() const { -        return host_ptr; -    } - -    std::size_t GetSize() const { -        return size; -    } - -    std::size_t GetCapacity() const { -        return capacity; -    } - -    bool IsInternalized() const { -        return is_internal; -    } - -    const BufferStorageType& GetBuffer() const { -        return buffer; -    } - -    void SetSize(std::size_t new_size) { -        size = new_size; -    } - -    void SetInternalState(bool is_internal_) { -        is_internal = is_internal_; -    } - -    BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { -        capacity = new_capacity; -        std::swap(buffer, buffer_); -        return buffer_; -    } - -private: -    u8* host_ptr{}; -    VAddr cpu_addr{}; -    std::size_t size{}; -    std::size_t capacity{}; -    bool is_internal{}; -    BufferStorageType buffer; -}; - -template <typename BufferStorageType, typename BufferType, typename StreamBuffer> -class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { -public: -    using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; -    using BufferInfo = std::pair<const BufferType*, u64>; - -    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, -                         std::unique_ptr<StreamBuffer> stream_buffer) -        : RasterizerCache<Buffer>{rasterizer}, system{system}, -          stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ -                                                       this->stream_buffer->GetHandle()} {} -    ~BufferCache() = default; - -    void Unregister(const Buffer& entry) override { -        std::lock_guard lock{RasterizerCache<Buffer>::mutex}; -        if (entry->IsInternalized()) { -            internalized_entries.erase(entry->GetCacheAddr()); -        } -        ReserveBuffer(entry); -        RasterizerCache<Buffer>::Unregister(entry); -    } - -    void TickFrame() { -        marked_for_destruction_index = -            (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); -        MarkedForDestruction().clear(); -    } - -    BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, -                            bool internalize = false, bool is_written = false) { -        std::lock_guard lock{RasterizerCache<Buffer>::mutex}; - -        auto& memory_manager = system.GPU().MemoryManager(); -        const auto host_ptr = memory_manager.GetPointer(gpu_addr); -        if (!host_ptr) { -            return {GetEmptyBuffer(size), 0}; -        } -        const auto cache_addr = ToCacheAddr(host_ptr); - -        // Cache management is a big overhead, so only cache entries with a given size. -        // TODO: Figure out which size is the best for given games. -        constexpr std::size_t max_stream_size = 0x800; -        if (!internalize && size < max_stream_size && -            internalized_entries.find(cache_addr) == internalized_entries.end()) { -            return StreamBufferUpload(host_ptr, size, alignment); -        } - -        auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); -        if (!entry) { -            return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); -        } - -        if (entry->GetSize() < size) { -            IncreaseBufferSize(entry, size); -        } -        if (is_written) { -            entry->MarkAsModified(true, *this); -        } -        return {ToHandle(entry->GetBuffer()), 0}; -    } - -    /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. -    BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, -                                std::size_t alignment = 4) { -        std::lock_guard lock{RasterizerCache<Buffer>::mutex}; -        return StreamBufferUpload(raw_pointer, size, alignment); -    } - -    void Map(std::size_t max_size) { -        std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); -        buffer_offset = buffer_offset_base; -    } - -    /// Finishes the upload stream, returns true on bindings invalidation. -    bool Unmap() { -        stream_buffer->Unmap(buffer_offset - buffer_offset_base); -        return std::exchange(invalidated, false); -    } - -    virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; - -protected: -    void FlushObjectInner(const Buffer& entry) override { -        DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); -    } - -    virtual BufferStorageType CreateBuffer(std::size_t size) = 0; - -    virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; - -    virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, -                                  std::size_t size, const u8* data) = 0; - -    virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, -                                    std::size_t size, u8* data) = 0; - -    virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, -                                std::size_t src_offset, std::size_t dst_offset, -                                std::size_t size) = 0; - -private: -    BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, -                                  std::size_t alignment) { -        AlignBuffer(alignment); -        const std::size_t uploaded_offset = buffer_offset; -        std::memcpy(buffer_ptr, raw_pointer, size); - -        buffer_ptr += size; -        buffer_offset += size; -        return {&stream_buffer_handle, uploaded_offset}; -    } - -    BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, -                                 bool internalize, bool is_written) { -        auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); -        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); -        ASSERT(cpu_addr); - -        auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); -        entry->SetSize(size); -        entry->SetInternalState(internalize); -        RasterizerCache<Buffer>::Register(entry); - -        if (internalize) { -            internalized_entries.emplace(ToCacheAddr(host_ptr)); -        } -        if (is_written) { -            entry->MarkAsModified(true, *this); -        } - -        if (entry->GetCapacity() < size) { -            MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); -        } - -        UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); -        return {ToHandle(entry->GetBuffer()), 0}; -    } - -    void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { -        const std::size_t old_size = entry->GetSize(); -        if (entry->GetCapacity() < new_size) { -            const auto& old_buffer = entry->GetBuffer(); -            auto new_buffer = CreateBuffer(new_size); - -            // Copy bits from the old buffer to the new buffer. -            CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); -            MarkedForDestruction().push_back( -                entry->ExchangeBuffer(std::move(new_buffer), new_size)); - -            // This buffer could have been used -            invalidated = true; -        } -        // Upload the new bits. -        const std::size_t size_diff = new_size - old_size; -        UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); - -        // Update entry's size in the object and in the cache. -        Unregister(entry); - -        entry->SetSize(new_size); -        RasterizerCache<Buffer>::Register(entry); -    } - -    Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { -        if (auto entry = TryGetReservedBuffer(host_ptr)) { -            return entry; -        } -        return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); -    } - -    Buffer TryGetReservedBuffer(u8* host_ptr) { -        const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); -        if (it == buffer_reserve.end()) { -            return {}; -        } -        auto& reserve = it->second; -        auto entry = reserve.back(); -        reserve.pop_back(); -        return entry; -    } - -    void ReserveBuffer(Buffer entry) { -        buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); -    } - -    void AlignBuffer(std::size_t alignment) { -        // Align the offset, not the mapped pointer -        const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); -        buffer_ptr += offset_aligned - buffer_offset; -        buffer_offset = offset_aligned; -    } - -    std::vector<BufferStorageType>& MarkedForDestruction() { -        return marked_for_destruction_ring_buffer[marked_for_destruction_index]; -    } - -    Core::System& system; - -    std::unique_ptr<StreamBuffer> stream_buffer; -    BufferType stream_buffer_handle{}; - -    bool invalidated = false; - -    u8* buffer_ptr = nullptr; -    u64 buffer_offset = 0; -    u64 buffer_offset_base = 0; - -    std::size_t marked_for_destruction_index = 0; -    std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; - -    std::unordered_set<CacheAddr> internalized_entries; -    std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; -}; - -} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h new file mode 100644 index 000000000..d2124443f --- /dev/null +++ b/src/video_core/buffer_cache/buffer_block.h @@ -0,0 +1,77 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_set> +#include <utility> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "video_core/gpu.h" + +namespace VideoCommon { + +class BufferBlock { +public: +    bool Overlaps(const CacheAddr start, const CacheAddr end) const { +        return (cache_addr < end) && (cache_addr_end > start); +    } + +    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { +        return cache_addr <= other_start && other_end <= cache_addr_end; +    } + +    u8* GetWritableHostPtr() const { +        return FromCacheAddr(cache_addr); +    } + +    u8* GetWritableHostPtr(std::size_t offset) const { +        return FromCacheAddr(cache_addr + offset); +    } + +    std::size_t GetOffset(const CacheAddr in_addr) { +        return static_cast<std::size_t>(in_addr - cache_addr); +    } + +    CacheAddr GetCacheAddr() const { +        return cache_addr; +    } + +    CacheAddr GetCacheAddrEnd() const { +        return cache_addr_end; +    } + +    void SetCacheAddr(const CacheAddr new_addr) { +        cache_addr = new_addr; +        cache_addr_end = new_addr + size; +    } + +    std::size_t GetSize() const { +        return size; +    } + +    void SetEpoch(u64 new_epoch) { +        epoch = new_epoch; +    } + +    u64 GetEpoch() { +        return epoch; +    } + +protected: +    explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { +        SetCacheAddr(cache_addr); +    } +    ~BufferBlock() = default; + +private: +    CacheAddr cache_addr{}; +    CacheAddr cache_addr_end{}; +    u64 pages{}; +    std::size_t size{}; +    u64 epoch{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h new file mode 100644 index 000000000..38ce16ed5 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -0,0 +1,449 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <mutex> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/buffer_cache/buffer_block.h" +#include "video_core/buffer_cache/map_interval.h" +#include "video_core/memory_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +using MapInterval = std::shared_ptr<MapIntervalBase>; + +template <typename TBuffer, typename TBufferType, typename StreamBuffer> +class BufferCache { +public: +    using BufferInfo = std::pair<const TBufferType*, u64>; + +    BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, +                            bool is_written = false) { +        std::lock_guard lock{mutex}; + +        auto& memory_manager = system.GPU().MemoryManager(); +        const auto host_ptr = memory_manager.GetPointer(gpu_addr); +        if (!host_ptr) { +            return {GetEmptyBuffer(size), 0}; +        } +        const auto cache_addr = ToCacheAddr(host_ptr); + +        // Cache management is a big overhead, so only cache entries with a given size. +        // TODO: Figure out which size is the best for given games. +        constexpr std::size_t max_stream_size = 0x800; +        if (size < max_stream_size) { +            if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { +                return StreamBufferUpload(host_ptr, size, alignment); +            } +        } + +        auto block = GetBlock(cache_addr, size); +        auto map = MapAddress(block, gpu_addr, cache_addr, size); +        if (is_written) { +            map->MarkAsModified(true, GetModifiedTicks()); +            if (!map->IsWritten()) { +                map->MarkAsWritten(true); +                MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); +            } +        } else { +            if (map->IsWritten()) { +                WriteBarrier(); +            } +        } + +        const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); + +        return {ToHandle(block), offset}; +    } + +    /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. +    BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, +                                std::size_t alignment = 4) { +        std::lock_guard lock{mutex}; +        return StreamBufferUpload(raw_pointer, size, alignment); +    } + +    void Map(std::size_t max_size) { +        std::lock_guard lock{mutex}; + +        std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); +        buffer_offset = buffer_offset_base; +    } + +    /// Finishes the upload stream, returns true on bindings invalidation. +    bool Unmap() { +        std::lock_guard lock{mutex}; + +        stream_buffer->Unmap(buffer_offset - buffer_offset_base); +        return std::exchange(invalidated, false); +    } + +    void TickFrame() { +        ++epoch; +        while (!pending_destruction.empty()) { +            if (pending_destruction.front()->GetEpoch() + 1 > epoch) { +                break; +            } +            pending_destruction.pop_front(); +        } +    } + +    /// Write any cached resources overlapping the specified region back to memory +    void FlushRegion(CacheAddr addr, std::size_t size) { +        std::lock_guard lock{mutex}; + +        std::vector<MapInterval> objects = GetMapsInRange(addr, size); +        std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { +            return a->GetModificationTick() < b->GetModificationTick(); +        }); +        for (auto& object : objects) { +            if (object->IsModified() && object->IsRegistered()) { +                FlushMap(object); +            } +        } +    } + +    /// Mark the specified region as being invalidated +    void InvalidateRegion(CacheAddr addr, u64 size) { +        std::lock_guard lock{mutex}; + +        std::vector<MapInterval> objects = GetMapsInRange(addr, size); +        for (auto& object : objects) { +            if (object->IsRegistered()) { +                Unregister(object); +            } +        } +    } + +    virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; + +protected: +    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, +                         std::unique_ptr<StreamBuffer> stream_buffer) +        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, +          stream_buffer_handle{this->stream_buffer->GetHandle()} {} + +    ~BufferCache() = default; + +    virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; + +    virtual void WriteBarrier() = 0; + +    virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; + +    virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, +                                 const u8* data) = 0; + +    virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, +                                   u8* data) = 0; + +    virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, +                           std::size_t dst_offset, std::size_t size) = 0; + +    /// Register an object into the cache +    void Register(const MapInterval& new_map, bool inherit_written = false) { +        const CacheAddr cache_ptr = new_map->GetStart(); +        const std::optional<VAddr> cpu_addr = +            system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); +        if (!cache_ptr || !cpu_addr) { +            LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", +                         new_map->GetGpuAddress()); +            return; +        } +        const std::size_t size = new_map->GetEnd() - new_map->GetStart(); +        new_map->SetCpuAddress(*cpu_addr); +        new_map->MarkAsRegistered(true); +        const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; +        mapped_addresses.insert({interval, new_map}); +        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); +        if (inherit_written) { +            MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); +            new_map->MarkAsWritten(true); +        } +    } + +    /// Unregisters an object from the cache +    void Unregister(MapInterval& map) { +        const std::size_t size = map->GetEnd() - map->GetStart(); +        rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); +        map->MarkAsRegistered(false); +        if (map->IsWritten()) { +            UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); +        } +        const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; +        mapped_addresses.erase(delete_interval); +    } + +private: +    MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { +        return std::make_shared<MapIntervalBase>(start, end, gpu_addr); +    } + +    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, +                           const CacheAddr cache_addr, const std::size_t size) { + +        std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); +        if (overlaps.empty()) { +            const CacheAddr cache_addr_end = cache_addr + size; +            MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); +            u8* host_ptr = FromCacheAddr(cache_addr); +            UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); +            Register(new_map); +            return new_map; +        } + +        const CacheAddr cache_addr_end = cache_addr + size; +        if (overlaps.size() == 1) { +            MapInterval& current_map = overlaps[0]; +            if (current_map->IsInside(cache_addr, cache_addr_end)) { +                return current_map; +            } +        } +        CacheAddr new_start = cache_addr; +        CacheAddr new_end = cache_addr_end; +        bool write_inheritance = false; +        bool modified_inheritance = false; +        // Calculate new buffer parameters +        for (auto& overlap : overlaps) { +            new_start = std::min(overlap->GetStart(), new_start); +            new_end = std::max(overlap->GetEnd(), new_end); +            write_inheritance |= overlap->IsWritten(); +            modified_inheritance |= overlap->IsModified(); +        } +        GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; +        for (auto& overlap : overlaps) { +            Unregister(overlap); +        } +        UpdateBlock(block, new_start, new_end, overlaps); +        MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); +        if (modified_inheritance) { +            new_map->MarkAsModified(true, GetModifiedTicks()); +        } +        Register(new_map, write_inheritance); +        return new_map; +    } + +    void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, +                     std::vector<MapInterval>& overlaps) { +        const IntervalType base_interval{start, end}; +        IntervalSet interval_set{}; +        interval_set.add(base_interval); +        for (auto& overlap : overlaps) { +            const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; +            interval_set.subtract(subtract); +        } +        for (auto& interval : interval_set) { +            std::size_t size = interval.upper() - interval.lower(); +            if (size > 0) { +                u8* host_ptr = FromCacheAddr(interval.lower()); +                UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); +            } +        } +    } + +    std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { +        if (size == 0) { +            return {}; +        } + +        std::vector<MapInterval> objects{}; +        const IntervalType interval{addr, addr + size}; +        for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { +            objects.push_back(pair.second); +        } + +        return objects; +    } + +    /// Returns a ticks counter used for tracking when cached objects were last modified +    u64 GetModifiedTicks() { +        return ++modified_ticks; +    } + +    void FlushMap(MapInterval map) { +        std::size_t size = map->GetEnd() - map->GetStart(); +        TBuffer block = blocks[map->GetStart() >> block_page_bits]; +        u8* host_ptr = FromCacheAddr(map->GetStart()); +        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); +        map->MarkAsModified(false, 0); +    } + +    BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, +                                  std::size_t alignment) { +        AlignBuffer(alignment); +        const std::size_t uploaded_offset = buffer_offset; +        std::memcpy(buffer_ptr, raw_pointer, size); + +        buffer_ptr += size; +        buffer_offset += size; +        return {&stream_buffer_handle, uploaded_offset}; +    } + +    void AlignBuffer(std::size_t alignment) { +        // Align the offset, not the mapped pointer +        const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); +        buffer_ptr += offset_aligned - buffer_offset; +        buffer_offset = offset_aligned; +    } + +    TBuffer EnlargeBlock(TBuffer buffer) { +        const std::size_t old_size = buffer->GetSize(); +        const std::size_t new_size = old_size + block_page_size; +        const CacheAddr cache_addr = buffer->GetCacheAddr(); +        TBuffer new_buffer = CreateBlock(cache_addr, new_size); +        CopyBlock(buffer, new_buffer, 0, 0, old_size); +        buffer->SetEpoch(epoch); +        pending_destruction.push_back(buffer); +        const CacheAddr cache_addr_end = cache_addr + new_size - 1; +        u64 page_start = cache_addr >> block_page_bits; +        const u64 page_end = cache_addr_end >> block_page_bits; +        while (page_start <= page_end) { +            blocks[page_start] = new_buffer; +            ++page_start; +        } +        return new_buffer; +    } + +    TBuffer MergeBlocks(TBuffer first, TBuffer second) { +        const std::size_t size_1 = first->GetSize(); +        const std::size_t size_2 = second->GetSize(); +        const CacheAddr first_addr = first->GetCacheAddr(); +        const CacheAddr second_addr = second->GetCacheAddr(); +        const CacheAddr new_addr = std::min(first_addr, second_addr); +        const std::size_t new_size = size_1 + size_2; +        TBuffer new_buffer = CreateBlock(new_addr, new_size); +        CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); +        CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); +        first->SetEpoch(epoch); +        second->SetEpoch(epoch); +        pending_destruction.push_back(first); +        pending_destruction.push_back(second); +        const CacheAddr cache_addr_end = new_addr + new_size - 1; +        u64 page_start = new_addr >> block_page_bits; +        const u64 page_end = cache_addr_end >> block_page_bits; +        while (page_start <= page_end) { +            blocks[page_start] = new_buffer; +            ++page_start; +        } +        return new_buffer; +    } + +    TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { +        TBuffer found{}; +        const CacheAddr cache_addr_end = cache_addr + size - 1; +        u64 page_start = cache_addr >> block_page_bits; +        const u64 page_end = cache_addr_end >> block_page_bits; +        const u64 num_pages = page_end - page_start + 1; +        while (page_start <= page_end) { +            auto it = blocks.find(page_start); +            if (it == blocks.end()) { +                if (found) { +                    found = EnlargeBlock(found); +                } else { +                    const CacheAddr start_addr = (page_start << block_page_bits); +                    found = CreateBlock(start_addr, block_page_size); +                    blocks[page_start] = found; +                } +            } else { +                if (found) { +                    if (found == it->second) { +                        ++page_start; +                        continue; +                    } +                    found = MergeBlocks(found, it->second); +                } else { +                    found = it->second; +                } +            } +            ++page_start; +        } +        return found; +    } + +    void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { +        u64 page_start = start >> write_page_bit; +        const u64 page_end = end >> write_page_bit; +        while (page_start <= page_end) { +            auto it = written_pages.find(page_start); +            if (it != written_pages.end()) { +                it->second = it->second + 1; +            } else { +                written_pages[page_start] = 1; +            } +            page_start++; +        } +    } + +    void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { +        u64 page_start = start >> write_page_bit; +        const u64 page_end = end >> write_page_bit; +        while (page_start <= page_end) { +            auto it = written_pages.find(page_start); +            if (it != written_pages.end()) { +                if (it->second > 1) { +                    it->second = it->second - 1; +                } else { +                    written_pages.erase(it); +                } +            } +            page_start++; +        } +    } + +    bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { +        u64 page_start = start >> write_page_bit; +        const u64 page_end = end >> write_page_bit; +        while (page_start <= page_end) { +            if (written_pages.count(page_start) > 0) { +                return true; +            } +            page_start++; +        } +        return false; +    } + +    std::unique_ptr<StreamBuffer> stream_buffer; +    TBufferType stream_buffer_handle{}; + +    bool invalidated = false; + +    u8* buffer_ptr = nullptr; +    u64 buffer_offset = 0; +    u64 buffer_offset_base = 0; + +    using IntervalSet = boost::icl::interval_set<CacheAddr>; +    using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; +    using IntervalType = typename IntervalCache::interval_type; +    IntervalCache mapped_addresses{}; + +    static constexpr u64 write_page_bit{11}; +    std::unordered_map<u64, u32> written_pages{}; + +    static constexpr u64 block_page_bits{21}; +    static constexpr u64 block_page_size{1 << block_page_bits}; +    std::unordered_map<u64, TBuffer> blocks{}; + +    std::list<TBuffer> pending_destruction{}; +    u64 epoch{}; +    u64 modified_ticks{}; +    VideoCore::RasterizerInterface& rasterizer; +    Core::System& system; +    std::recursive_mutex mutex; +}; + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h new file mode 100644 index 000000000..3a104d5cd --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.h @@ -0,0 +1,89 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "video_core/gpu.h" + +namespace VideoCommon { + +class MapIntervalBase { +public: +    MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) +        : start{start}, end{end}, gpu_addr{gpu_addr} {} + +    void SetCpuAddress(VAddr new_cpu_addr) { +        cpu_addr = new_cpu_addr; +    } + +    VAddr GetCpuAddress() const { +        return cpu_addr; +    } + +    GPUVAddr GetGpuAddress() const { +        return gpu_addr; +    } + +    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { +        return (start <= other_start && other_end <= end); +    } + +    bool operator==(const MapIntervalBase& rhs) const { +        return std::tie(start, end) == std::tie(rhs.start, rhs.end); +    } + +    bool operator!=(const MapIntervalBase& rhs) const { +        return !operator==(rhs); +    } + +    void MarkAsRegistered(const bool registered) { +        is_registered = registered; +    } + +    bool IsRegistered() const { +        return is_registered; +    } + +    CacheAddr GetStart() const { +        return start; +    } + +    CacheAddr GetEnd() const { +        return end; +    } + +    void MarkAsModified(const bool is_modified_, const u64 tick) { +        is_modified = is_modified_; +        ticks = tick; +    } + +    bool IsModified() const { +        return is_modified; +    } + +    u64 GetModificationTick() const { +        return ticks; +    } + +    void MarkAsWritten(const bool is_written_) { +        is_written = is_written_; +    } + +    bool IsWritten() const { +        return is_written; +    } + +private: +    CacheAddr start; +    CacheAddr end; +    GPUVAddr gpu_addr; +    VAddr cpu_addr{}; +    bool is_written{}; +    bool is_modified{}; +    bool is_registered{}; +    u64 ticks{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 11857ff99..0baf2177c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) {      return reinterpret_cast<CacheAddr>(host_ptr);  } +inline u8* FromCacheAddr(CacheAddr cache_addr) { +    return reinterpret_cast<u8*>(cache_addr); +} +  namespace Core {  class System;  } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2a9b523f5..0781e6595 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,28 +7,40 @@  #include <glad/glad.h>  #include "common/assert.h" +#include "common/microprofile.h"  #include "video_core/renderer_opengl/gl_buffer_cache.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  namespace OpenGL { +MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); + +CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) +    : VideoCommon::BufferBlock{cache_addr, size} { +    gl_buffer.Create(); +    glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); +} + +CachedBufferBlock::~CachedBufferBlock() = default; +  OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,                                 std::size_t stream_size) -    : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ +    : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{            rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}  OGLBufferCache::~OGLBufferCache() = default; -OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { -    OGLBuffer buffer; -    buffer.Create(); -    glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); -    return buffer; +Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { +    return std::make_shared<CachedBufferBlock>(cache_addr, size); +} + +void OGLBufferCache::WriteBarrier() { +    glMemoryBarrier(GL_ALL_BARRIER_BITS);  } -const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { -    return &buffer.handle; +const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { +    return buffer->GetHandle();  }  const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { @@ -36,23 +48,24 @@ const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {      return &null_buffer;  } -void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, -                                      const u8* data) { -    glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), +void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, +                                     const u8* data) { +    glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),                           static_cast<GLsizeiptr>(size), data);  } -void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, -                                        std::size_t size, u8* data) { -    glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), +void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, +                                       u8* data) { +    MICROPROFILE_SCOPE(OpenGL_Buffer_Download); +    glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),                              static_cast<GLsizeiptr>(size), data);  } -void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, -                                    std::size_t src_offset, std::size_t dst_offset, -                                    std::size_t size) { -    glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), -                             static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, +                               std::size_t dst_offset, std::size_t size) { +    glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), +                             static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), +                             static_cast<GLsizeiptr>(size));  }  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c8ac4038..022e7bfa9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -7,7 +7,7 @@  #include <memory>  #include "common/common_types.h" -#include "video_core/buffer_cache.h" +#include "video_core/buffer_cache/buffer_cache.h"  #include "video_core/rasterizer_cache.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -21,7 +21,24 @@ namespace OpenGL {  class OGLStreamBuffer;  class RasterizerOpenGL; -class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { +class CachedBufferBlock; + +using Buffer = std::shared_ptr<CachedBufferBlock>; + +class CachedBufferBlock : public VideoCommon::BufferBlock { +public: +    explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); +    ~CachedBufferBlock(); + +    const GLuint* GetHandle() const { +        return &gl_buffer.handle; +    } + +private: +    OGLBuffer gl_buffer{}; +}; + +class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {  public:      explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,                              std::size_t stream_size); @@ -30,18 +47,20 @@ public:      const GLuint* GetEmptyBuffer(std::size_t) override;  protected: -    OGLBuffer CreateBuffer(std::size_t size) override; +    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + +    void WriteBarrier() override; -    const GLuint* ToHandle(const OGLBuffer& buffer) override; +    const GLuint* ToHandle(const Buffer& buffer) override; -    void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, -                          const u8* data) override; +    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, +                         const u8* data) override; -    void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, -                            u8* data) override; +    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, +                           u8* data) override; -    void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, -                        std::size_t dst_offset, std::size_t size) override; +    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, +                   std::size_t dst_offset, std::size_t size) override;  };  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 80cfda7e4..019583718 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -980,7 +980,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr                                           GPUVAddr gpu_addr, std::size_t size) {      const auto alignment{device.GetShaderStorageBufferAlignment()};      const auto [ssbo, buffer_offset] = -        buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); +        buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());      bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));  } | 
