diff options
| author | bunnei <bunneidev@gmail.com> | 2018-06-26 16:14:14 -0400 | 
|---|---|---|
| committer | bunnei <bunneidev@gmail.com> | 2018-06-27 00:15:44 -0400 | 
| commit | 1dd754590fb9850bf00ddacbb860076dbbacabc6 (patch) | |
| tree | a628bb47bb9f3308c281b608ee6c347883553bf6 | |
| parent | 8af1ae46aa5a9303b21839b446d2ebf17ee12802 (diff) | |
gl_rasterizer_cache: Implement caching for texture and framebuffer surfaces.
gl_rasterizer_cache: Improved cache management based on Citra's implementation.
gl_surface_cache: Add some docstrings.
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 25 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 116 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 43 | 
3 files changed, 168 insertions, 16 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f9b0ce434..62ee45a36 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() {      // Mark framebuffer surfaces as dirty      if (color_surface != nullptr && write_color_fb) { -        res_cache.FlushSurface(color_surface); +        res_cache.MarkSurfaceAsDirty(color_surface);      }      if (depth_surface != nullptr && write_depth_fb) { -        res_cache.FlushSurface(depth_surface); +        res_cache.MarkSurfaceAsDirty(depth_surface);      }  }  void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} -void RasterizerOpenGL::FlushAll() {} +void RasterizerOpenGL::FlushAll() { +    MICROPROFILE_SCOPE(OpenGL_CacheManagement); +    res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); +} -void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { +    MICROPROFILE_SCOPE(OpenGL_CacheManagement); +    res_cache.FlushRegion(addr, size); +} -void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { +    MICROPROFILE_SCOPE(OpenGL_CacheManagement); +    res_cache.InvalidateRegion(addr, size); +} -void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { +    MICROPROFILE_SCOPE(OpenGL_CacheManagement); +    res_cache.FlushRegion(addr, size); +    res_cache.InvalidateRegion(addr, size); +}  bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {      MICROPROFILE_SCOPE(OpenGL_Blits); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bd35bdb02..71ad7be74 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -12,6 +12,7 @@  #include "core/core.h"  #include "core/hle/kernel/process.h"  #include "core/memory.h" +#include "core/settings.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_opengl/gl_rasterizer_cache.h"  #include "video_core/textures/astc.h" @@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup      cur_state.Apply();  } -CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {      texture.Create();      const auto& rect{params.GetRect()};      AllocateSurfaceTexture(texture.handle, @@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {      draw_framebuffer.Create();  } +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { +    while (!surface_cache.empty()) { +        UnregisterSurface(surface_cache.begin()->second); +    } +} +  Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {      return GetSurface(SurfaceParams::CreateForTexture(config));  } @@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {      surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);  } -void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { -    surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); -    surface->FlushGLBuffer(); +void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { +    if (Settings::values.use_accurate_framebuffers) { +        // If enabled, always flush dirty surfaces +        surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); +        surface->FlushGLBuffer(); +    } else { +        // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads +        // and flushes are very slow and do not seem to improve accuracy +        const auto& params{surface->GetSurfaceParams()}; +        Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); +    }  }  Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { @@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {      Surface surface;      if (search != surface_cache.end()) {          surface = search->second; +        if (Settings::values.use_accurate_framebuffers) { +            // Reload the surface from Switch memory +            LoadSurface(surface); +        }      } else {          surface = std::make_shared<CachedSurface>(params); -        surface_cache[surface_key] = surface; +        RegisterSurface(surface); +        LoadSurface(surface);      } -    LoadSurface(surface); -      return surface;  } @@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {      return surfaces[0];  } + +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { +    // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should +    // probably implement this in the future, but for now, the `use_accurate_framebufers` setting +    // can be used to always flush. +} + +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { +    for (const auto& pair : surface_cache) { +        const auto& surface{pair.second}; +        const auto& params{surface->GetSurfaceParams()}; + +        if (params.IsOverlappingRegion(addr, size)) { +            UnregisterSurface(surface); +        } +    } +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { +    const auto& params{surface->GetSurfaceParams()}; +    const auto& surface_key{SurfaceKey::Create(params)}; +    const auto& search{surface_cache.find(surface_key)}; + +    if (search != surface_cache.end()) { +        // Registered already +        return; +    } + +    surface_cache[surface_key] = surface; +    UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { +    const auto& params{surface->GetSurfaceParams()}; +    const auto& surface_key{SurfaceKey::Create(params)}; +    const auto& search{surface_cache.find(surface_key)}; + +    if (search == surface_cache.end()) { +        // Unregistered already +        return; +    } + +    UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); +    surface_cache.erase(search); +} + +template <typename Map, typename Interval> +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { +    return boost::make_iterator_range(map.equal_range(interval)); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { +    const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - +                          (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; +    const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; +    const u64 page_end = page_start + num_pages; + +    // Interval maps will erase segments if count reaches 0, so if delta is negative we have to +    // subtract after iterating +    const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); +    if (delta > 0) +        cached_pages.add({pages_interval, delta}); + +    for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { +        const auto interval = pair.first & pages_interval; +        const int count = pair.second; + +        const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) +                                                    << Tegra::MemoryManager::PAGE_BITS; +        const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) +                                                  << Tegra::MemoryManager::PAGE_BITS; +        const u64 interval_size = interval_end_addr - interval_start_addr; + +        if (delta > 0 && count == delta) +            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); +        else if (delta < 0 && count == -delta) +            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); +        else +            ASSERT(count >= 0); +    } + +    if (delta < 0) +        cached_pages.add({pages_interval, delta}); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 84bdec652..85e7c8888 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -8,7 +8,7 @@  #include <map>  #include <memory>  #include <vector> - +#include <boost/icl/interval_map.hpp>  #include "common/common_types.h"  #include "common/hash.h"  #include "common/math_util.h" @@ -19,6 +19,7 @@  class CachedSurface;  using Surface = std::shared_ptr<CachedSurface>;  using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; +using PageMap = boost::icl::interval_map<u64, int>;  struct SurfaceParams {      enum class PixelFormat { @@ -243,8 +244,10 @@ struct SurfaceParams {          return SurfaceType::Invalid;      } +    /// Returns the rectangle corresponding to this surface      MathUtil::Rectangle<u32> GetRect() const; +    /// Returns the size of this surface in bytes, adjusted for compression      size_t SizeInBytes() const {          const u32 compression_factor{GetCompressionFactor(pixel_format)};          ASSERT(width % compression_factor == 0); @@ -253,10 +256,18 @@ struct SurfaceParams {                 GetFormatBpp(pixel_format) / CHAR_BIT;      } +    /// Returns the CPU virtual address for this surface      VAddr GetCpuAddr() const; +    /// Returns true if the specified region overlaps with this surface's region in Switch memory +    bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { +        return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); +    } + +    /// Creates SurfaceParams from a texture configation      static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); +    /// Creates SurfaceParams from a framebuffer configation      static SurfaceParams CreateForFramebuffer(          const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); @@ -272,6 +283,7 @@ struct SurfaceParams {      size_t size_in_bytes;  }; +/// Hashable variation of SurfaceParams, used for a key in the surface cache  struct SurfaceKey : Common::HashableStruct<SurfaceParams> {      static SurfaceKey Create(const SurfaceParams& params) {          SurfaceKey res; @@ -325,18 +337,43 @@ private:  class RasterizerCacheOpenGL final : NonCopyable {  public:      RasterizerCacheOpenGL(); +    ~RasterizerCacheOpenGL(); +    /// Get a surface based on the texture configuration      Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); + +    /// Get the color and depth surfaces based on the framebuffer configuration      SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,                                                      const MathUtil::Rectangle<s32>& viewport); -    void LoadSurface(const Surface& surface); -    void FlushSurface(const Surface& surface); + +    /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory +    void MarkSurfaceAsDirty(const Surface& surface); + +    /// Tries to find a framebuffer GPU address based on the provided CPU address      Surface TryFindFramebufferSurface(VAddr cpu_addr) const; +    /// Write any cached resources overlapping the region back to memory (if dirty) +    void FlushRegion(Tegra::GPUVAddr addr, size_t size); + +    /// Mark the specified region as being invalidated +    void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); +  private: +    void LoadSurface(const Surface& surface);      Surface GetSurface(const SurfaceParams& params); +    /// Register surface into the cache +    void RegisterSurface(const Surface& surface); + +    /// Remove surface from the cache +    void UnregisterSurface(const Surface& surface); + +    /// Increase/decrease the number of surface in pages touching the specified region +    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); +      std::unordered_map<SurfaceKey, Surface> surface_cache; +    PageMap cached_pages; +      OGLFramebuffer read_framebuffer;      OGLFramebuffer draw_framebuffer;  };  | 
