diff options
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 138 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 47 | 
4 files changed, 157 insertions, 72 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index dbd106c53..3c32f1067 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -487,7 +487,12 @@ public:                      };                  } rt_control; -                INSERT_PADDING_WORDS(0x2B); +                INSERT_PADDING_WORDS(0x2); + +                u32 zeta_width; +                u32 zeta_height; + +                INSERT_PADDING_WORDS(0x27);                  u32 depth_test_enable; @@ -540,7 +545,11 @@ public:                  u32 vb_element_base; -                INSERT_PADDING_WORDS(0x49); +                INSERT_PADDING_WORDS(0x40); + +                u32 zeta_enable; + +                INSERT_PADDING_WORDS(0x8);                  struct {                      u32 tsc_address_high; @@ -865,6 +874,8 @@ ASSERT_REG_POSITION(clear_depth, 0x364);  ASSERT_REG_POSITION(zeta, 0x3F8);  ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);  ASSERT_REG_POSITION(rt_control, 0x487); +ASSERT_REG_POSITION(zeta_width, 0x48a); +ASSERT_REG_POSITION(zeta_height, 0x48b);  ASSERT_REG_POSITION(depth_test_enable, 0x4B3);  ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);  ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -874,6 +885,7 @@ ASSERT_REG_POSITION(blend, 0x4CF);  ASSERT_REG_POSITION(stencil, 0x4E0);  ASSERT_REG_POSITION(screen_y_control, 0x4EB);  ASSERT_REG_POSITION(vb_element_base, 0x50D); +ASSERT_REG_POSITION(zeta_enable, 0x54E);  ASSERT_REG_POSITION(tsc, 0x557);  ASSERT_REG_POSITION(tic, 0x55D);  ASSERT_REG_POSITION(stencil_two_side, 0x565); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 65a2fd5e8..56d9c575b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -387,7 +387,7 @@ void RasterizerOpenGL::Clear() {      }      if (regs.clear_buffers.Z) {          clear_mask |= GL_DEPTH_BUFFER_BIT; -        use_depth_fb = true; +        use_depth_fb = regs.zeta_enable != 0;          // Always enable the depth write when clearing the depth buffer. The depth write mask is          // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. @@ -413,11 +413,13 @@ void RasterizerOpenGL::Clear() {      glClear(clear_mask);      // Mark framebuffer surfaces as dirty -    if (dirty_color_surface != nullptr) { -        res_cache.MarkSurfaceAsDirty(dirty_color_surface); -    } -    if (dirty_depth_surface != nullptr) { -        res_cache.MarkSurfaceAsDirty(dirty_depth_surface); +    if (Settings::values.use_accurate_framebuffers) { +        if (dirty_color_surface != nullptr) { +            res_cache.FlushSurface(dirty_color_surface); +        } +        if (dirty_depth_surface != nullptr) { +            res_cache.FlushSurface(dirty_depth_surface); +        }      }  } @@ -431,7 +433,7 @@ void RasterizerOpenGL::DrawArrays() {      ScopeAcquireGLContext acquire_context;      auto [dirty_color_surface, dirty_depth_surface] = -        ConfigureFramebuffers(true, regs.zeta.Address() != 0); +        ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0);      SyncDepthTestState();      SyncBlendState(); @@ -520,11 +522,13 @@ void RasterizerOpenGL::DrawArrays() {      state.Apply();      // Mark framebuffer surfaces as dirty -    if (dirty_color_surface != nullptr) { -        res_cache.MarkSurfaceAsDirty(dirty_color_surface); -    } -    if (dirty_depth_surface != nullptr) { -        res_cache.MarkSurfaceAsDirty(dirty_depth_surface); +    if (Settings::values.use_accurate_framebuffers) { +        if (dirty_color_surface != nullptr) { +            res_cache.FlushSurface(dirty_color_surface); +        } +        if (dirty_depth_surface != nullptr) { +            res_cache.FlushSurface(dirty_depth_surface); +        }      }  } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2c43982b0..28f0bc379 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -65,9 +65,9 @@ struct FormatTuple {      return params;  } -/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( -    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address, -    Tegra::DepthFormat format) { +/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, +                                                             Tegra::GPUVAddr zeta_address, +                                                             Tegra::DepthFormat format) {      SurfaceParams params{};      params.addr = zeta_address; @@ -77,9 +77,9 @@ struct FormatTuple {      params.component_type = ComponentTypeFromDepthFormat(format);      params.type = GetFormatType(params.pixel_format);      params.size_in_bytes = params.SizeInBytes(); -    params.width = config.width; -    params.height = config.height; -    params.unaligned_height = config.height; +    params.width = zeta_width; +    params.height = zeta_height; +    params.unaligned_height = zeta_height;      params.size_in_bytes = params.SizeInBytes();      return params;  } @@ -254,6 +254,60 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup      cur_state.Apply();  } +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, +                         const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, +                         GLuint read_fb_handle, GLuint draw_fb_handle) { +    OpenGLState prev_state{OpenGLState::GetCurState()}; +    SCOPE_EXIT({ prev_state.Apply(); }); + +    OpenGLState state; +    state.draw.read_framebuffer = read_fb_handle; +    state.draw.draw_framebuffer = draw_fb_handle; +    state.Apply(); + +    u32 buffers{}; + +    if (type == SurfaceType::ColorTexture) { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, +                               0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, +                               0); + +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, +                               0); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, +                               0); + +        buffers = GL_COLOR_BUFFER_BIT; +    } else if (type == SurfaceType::Depth) { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + +        buffers = GL_DEPTH_BUFFER_BIT; +    } else if (type == SurfaceType::DepthStencil) { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, +                               src_tex, 0); + +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, +                               dst_tex, 0); + +        buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; +    } + +    glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, +                      dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, +                      buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + +    return true; +} +  CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {      texture.Create();      const auto& rect{params.GetRect()}; @@ -519,8 +573,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(      }      if (using_depth_fb) { -        depth_params = -            SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format); +        depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height, +                                                           regs.zeta.Address(), regs.zeta.format);      }      MathUtil::Rectangle<u32> color_rect{}; @@ -565,17 +619,9 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {      surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);  } -void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { -    if (Settings::values.use_accurate_framebuffers) { -        // If enabled, always flush dirty surfaces -        surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); -        surface->FlushGLBuffer(); -    } else { -        // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads -        // and flushes are very slow and do not seem to improve accuracy -        const auto& params{surface->GetSurfaceParams()}; -        Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); -    } +void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { +    surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); +    surface->FlushGLBuffer();  }  Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { @@ -588,25 +634,53 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {      if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)          return {}; -    // Check for an exact match in existing surfaces -    const auto& surface_key{SurfaceKey::Create(params)}; -    const auto& search{surface_cache.find(surface_key)}; +    // Look up surface in the cache based on address +    const auto& search{surface_cache.find(params.addr)};      Surface surface;      if (search != surface_cache.end()) {          surface = search->second;          if (Settings::values.use_accurate_framebuffers) { -            // Reload the surface from Switch memory -            LoadSurface(surface); +            // If use_accurate_framebuffers is enabled, always load from memory +            FlushSurface(surface); +            UnregisterSurface(surface); +        } else if (surface->GetSurfaceParams() != params) { +            // If surface parameters changed, recreate the surface from the old one +            return RecreateSurface(surface, params); +        } else { +            // Use the cached surface as-is +            return surface;          } -    } else { -        surface = std::make_shared<CachedSurface>(params); -        RegisterSurface(surface); -        LoadSurface(surface);      } +    // No surface found - create a new one +    surface = std::make_shared<CachedSurface>(params); +    RegisterSurface(surface); +    LoadSurface(surface); +      return surface;  } +Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, +                                               const SurfaceParams& new_params) { +    // Verify surface is compatible for blitting +    const auto& params{surface->GetSurfaceParams()}; +    ASSERT(params.type == new_params.type); +    ASSERT(params.pixel_format == new_params.pixel_format); +    ASSERT(params.component_type == new_params.component_type); + +    // Create a new surface with the new parameters, and blit the previous surface to it +    Surface new_surface{std::make_shared<CachedSurface>(new_params)}; +    BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, +                 new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle, +                 draw_framebuffer.handle); + +    // Update cache accordingly +    UnregisterSurface(surface); +    RegisterSurface(new_surface); + +    return new_surface; +} +  Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {      // Tries to find the GPU address of a framebuffer based on the CPU address. This is because      // final output framebuffers are specified by CPU address, but internally our GPU cache uses @@ -652,22 +726,20 @@ void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size)  void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {      const auto& params{surface->GetSurfaceParams()}; -    const auto& surface_key{SurfaceKey::Create(params)}; -    const auto& search{surface_cache.find(surface_key)}; +    const auto& search{surface_cache.find(params.addr)};      if (search != surface_cache.end()) {          // Registered already          return;      } -    surface_cache[surface_key] = surface; +    surface_cache[params.addr] = surface;      UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);  }  void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {      const auto& params{surface->GetSurfaceParams()}; -    const auto& surface_key{SurfaceKey::Create(params)}; -    const auto& search{surface_cache.find(surface_key)}; +    const auto& search{surface_cache.find(params.addr)};      if (search == surface_cache.end()) {          // Unregistered already diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 718c45ce1..b084c4db4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -10,7 +10,6 @@  #include <vector>  #include <boost/icl/interval_map.hpp>  #include "common/common_types.h" -#include "common/hash.h"  #include "common/math_util.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -137,6 +136,7 @@ struct SurfaceParams {          ASSERT(static_cast<size_t>(format) < bpp_table.size());          return bpp_table[static_cast<size_t>(format)];      } +      u32 GetFormatBpp() const {          return GetFormatBpp(pixel_format);      } @@ -365,9 +365,21 @@ struct SurfaceParams {          const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);      /// Creates SurfaceParams for a depth buffer configuration -    static SurfaceParams CreateForDepthBuffer( -        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, -        Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format); +    static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, +                                              Tegra::GPUVAddr zeta_address, +                                              Tegra::DepthFormat format); + +    bool operator==(const SurfaceParams& other) const { +        return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width, +                        height, unaligned_height, size_in_bytes) == +               std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format, +                        other.component_type, other.type, other.width, other.height, +                        other.unaligned_height, other.size_in_bytes); +    } + +    bool operator!=(const SurfaceParams& other) const { +        return !operator==(other); +    }      Tegra::GPUVAddr addr;      bool is_tiled; @@ -381,24 +393,6 @@ struct SurfaceParams {      size_t size_in_bytes;  }; -/// Hashable variation of SurfaceParams, used for a key in the surface cache -struct SurfaceKey : Common::HashableStruct<SurfaceParams> { -    static SurfaceKey Create(const SurfaceParams& params) { -        SurfaceKey res; -        res.state = params; -        return res; -    } -}; - -namespace std { -template <> -struct hash<SurfaceKey> { -    size_t operator()(const SurfaceKey& k) const { -        return k.Hash(); -    } -}; -} // namespace std -  class CachedSurface final {  public:      CachedSurface(const SurfaceParams& params); @@ -444,8 +438,8 @@ public:      SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,                                                      const MathUtil::Rectangle<s32>& viewport); -    /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory -    void MarkSurfaceAsDirty(const Surface& surface); +    /// Flushes the surface to Switch memory +    void FlushSurface(const Surface& surface);      /// Tries to find a framebuffer GPU address based on the provided CPU address      Surface TryFindFramebufferSurface(VAddr cpu_addr) const; @@ -460,6 +454,9 @@ private:      void LoadSurface(const Surface& surface);      Surface GetSurface(const SurfaceParams& params); +    /// Recreates a surface with new parameters +    Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); +      /// Register surface into the cache      void RegisterSurface(const Surface& surface); @@ -469,7 +466,7 @@ private:      /// Increase/decrease the number of surface in pages touching the specified region      void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); -    std::unordered_map<SurfaceKey, Surface> surface_cache; +    std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;      PageMap cached_pages;      OGLFramebuffer read_framebuffer;  | 
