diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 69 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 286 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 111 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.h | 6 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 454 | 
6 files changed, 568 insertions, 381 deletions
| diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 027e9d293..482d0428c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -83,10 +83,10 @@ struct FramebufferCacheKey {      bool stencil_enable = false;      std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{}; -    std::array<CachedSurfaceView*, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{}; +    std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};      u32 colors_count = 0; -    CachedSurfaceView* zeta = nullptr; +    View zeta = nullptr;      auto Tie() const {          return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, @@ -115,6 +115,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind  RasterizerOpenGL::~RasterizerOpenGL() {} +void RasterizerOpenGL::InitMemoryMananger(Tegra::MemoryManager& memory_manager) { +    texture_cache.InitMemoryMananger(memory_manager); +} +  void RasterizerOpenGL::CheckExtensions() {      if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {          LOG_WARNING( @@ -474,9 +478,11 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(      }      current_framebuffer_config_state = fb_config_state; -    CachedSurfaceView* depth_surface{}; +    View depth_surface{};      if (using_depth_fb) {          depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); +    } else { +        texture_cache.SetEmptyDepthBuffer();      }      UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); @@ -489,38 +495,41 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(      if (using_color_fb) {          if (single_color_target) {              // Used when just a single color attachment is enabled, e.g. for clearing a color buffer -            CachedSurfaceView* color_surface{ +            View color_surface{                  texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)};              if (color_surface) {                  // Assume that a surface will be written to if it is used as a framebuffer, even if                  // the shader doesn't actually write to it. -                color_surface->MarkAsModified(true); +                texture_cache.MarkColorBufferInUse(*single_color_target);                  // Workaround for and issue in nvidia drivers                  // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ -                state.framebuffer_srgb.enabled |= -                    color_surface->GetSurfaceParams().GetSrgbConversion(); +                state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;              }              fbkey.is_single_buffer = true;              fbkey.color_attachments[0] =                  GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);              fbkey.colors[0] = color_surface; +            for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { +                if (index != *single_color_target) { +                    texture_cache.SetEmptyColorBuffer(index); +                } +            }          } else {              // Multiple color attachments are enabled              for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { -                CachedSurfaceView* color_surface{ -                    texture_cache.GetColorBufferSurface(index, preserve_contents)}; +                View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)};                  if (color_surface) {                      // Assume that a surface will be written to if it is used as a framebuffer, even                      // if the shader doesn't actually write to it. -                    color_surface->MarkAsModified(true); +                    texture_cache.MarkColorBufferInUse(index);                      // Enable sRGB only for supported formats                      // Workaround for and issue in nvidia drivers                      // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/                      state.framebuffer_srgb.enabled |= -                        color_surface->GetSurfaceParams().GetSrgbConversion(); +                        color_surface->GetSurfaceParams().srgb_conversion;                  }                  fbkey.color_attachments[index] = @@ -538,11 +547,11 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(      if (depth_surface) {          // Assume that a surface will be written to if it is used as a framebuffer, even if          // the shader doesn't actually write to it. -        depth_surface->MarkAsModified(true); +        texture_cache.MarkDepthBufferInUse();          fbkey.zeta = depth_surface; -        fbkey.stencil_enable = regs.stencil_enable && depth_surface->GetSurfaceParams().GetType() == -                                                          SurfaceType::DepthStencil; +        fbkey.stencil_enable = regs.stencil_enable && +                               depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;      }      SetupCachedFramebuffer(fbkey, current_state); @@ -728,11 +737,27 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {      buffer_cache.InvalidateRegion(addr, size);  } +void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { +    MICROPROFILE_SCOPE(OpenGL_CacheManagement); +    if (!addr || !size) { +        return; +    } +    texture_cache.InvalidateRegionEx(gpu_addr, size); +    shader_cache.InvalidateRegion(addr, size); +    global_cache.InvalidateRegion(addr, size); +    buffer_cache.InvalidateRegion(addr, size); +} +  void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {      FlushRegion(addr, size);      InvalidateRegion(addr, size);  } +void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { +    FlushRegion(addr, size); +    InvalidateRegionEx(gpu_addr, addr, size); +} +  bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,                                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,                                               const Common::Rectangle<u32>& src_rect, @@ -740,7 +765,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs      MICROPROFILE_SCOPE(OpenGL_Blits);      const auto src_surface{texture_cache.GetFermiSurface(src)};      const auto dst_surface{texture_cache.GetFermiSurface(dst)}; -    blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); +    // blitter.Blit(src_surface, dst_surface, src_rect, dst_rect);      return true;  } @@ -762,10 +787,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,      const auto& params{surface->GetSurfaceParams()};      const auto& pixel_format{          VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; -    ASSERT_MSG(params.GetWidth() == config.width, "Framebuffer width is different"); -    ASSERT_MSG(params.GetHeight() == config.height, "Framebuffer height is different"); +    ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); +    ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); -    if (params.GetPixelFormat() != pixel_format) { +    if (params.pixel_format != pixel_format) {          LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");      } @@ -860,10 +885,10 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s          state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); -        if (const auto surface{texture_cache.GetTextureSurface(texture)}; surface) { -            state.texture_units[current_bindpoint].texture = surface->GetTexture( -                entry.GetType(), entry.IsArray(), texture.tic.x_source, texture.tic.y_source, -                texture.tic.z_source, texture.tic.w_source); +        if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { +            view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, +                               texture.tic.w_source); +            state.texture_units[current_bindpoint].texture = view->GetTexture();          } else {              // Can occur when texture addr is null or its memory is unmapped/invalid              state.texture_units[current_bindpoint].texture = 0; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f7c2f46aa..871608f6d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -4,7 +4,9 @@  #include "common/assert.h"  #include "common/common_types.h" +#include "common/microprofile.h"  #include "common/scope_exit.h" +#include "core/core.h"  #include "video_core/morton.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  #include "video_core/renderer_opengl/gl_texture_cache.h" @@ -22,6 +24,9 @@ using VideoCore::Surface::ComponentType;  using VideoCore::Surface::PixelFormat;  using VideoCore::Surface::SurfaceTarget; +MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); +  namespace {  struct FormatTuple { @@ -129,8 +134,8 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType compon      return format;  } -GLenum GetTextureTarget(const SurfaceParams& params) { -    switch (params.GetTarget()) { +GLenum GetTextureTarget(const SurfaceTarget& target) { +    switch (target) {      case SurfaceTarget::Texture1D:          return GL_TEXTURE_1D;      case SurfaceTarget::Texture2D: @@ -175,8 +180,8 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {      glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);      glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);      glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.GetNumLevels() - 1); -    if (params.GetNumLevels() == 1) { +    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); +    if (params.num_levels == 1) {          glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);      }  } @@ -185,21 +190,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte      OGLTexture texture;      texture.Create(target); -    switch (params.GetTarget()) { +    switch (params.target) {      case SurfaceTarget::Texture1D: -        glTextureStorage1D(texture.handle, params.GetNumLevels(), internal_format, -                           params.GetWidth()); +        glTextureStorage1D(texture.handle, params.num_levels, internal_format, params.width);          break;      case SurfaceTarget::Texture2D:      case SurfaceTarget::TextureCubemap: -        glTextureStorage2D(texture.handle, params.GetNumLevels(), internal_format, -                           params.GetWidth(), params.GetHeight()); +        glTextureStorage2D(texture.handle, params.num_levels, internal_format, params.width, +                           params.height);          break;      case SurfaceTarget::Texture3D:      case SurfaceTarget::Texture2DArray:      case SurfaceTarget::TextureCubeArray: -        glTextureStorage3D(texture.handle, params.GetNumLevels(), internal_format, -                           params.GetWidth(), params.GetHeight(), params.GetDepth()); +        glTextureStorage3D(texture.handle, params.num_levels, internal_format, params.width, +                           params.height, params.depth);          break;      default:          UNREACHABLE(); @@ -212,54 +216,72 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte  } // Anonymous namespace -CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) -    : VideoCommon::SurfaceBase<TextureCacheOpenGL, CachedSurfaceView>{texture_cache, params} { -    const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; +CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) +    : VideoCommon::SurfaceBase<View>(gpu_addr, params) { +    const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};      internal_format = tuple.internal_format;      format = tuple.format;      type = tuple.type;      is_compressed = tuple.compressed; -    target = GetTextureTarget(params); +    target = GetTextureTarget(params.target);      texture = CreateTexture(params, target, internal_format); +    DecorateSurfaceName(); +    ViewParams main{}; +    main.num_levels = params.num_levels; +    main.base_level = 0; +    main.base_layer = 0; +    main.num_layers = params.is_layered ? params.depth : 1; +    main.target = params.target; +    main_view = CreateView(main); +    main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); +} + +CachedSurface::~CachedSurface() { +    views.clear(); +    main_view = nullptr;  } -CachedSurface::~CachedSurface() = default; +void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { +    LOG_CRITICAL(Render_OpenGL, "Flushing"); +    MICROPROFILE_SCOPE(OpenGL_Texture_Download); -void CachedSurface::DownloadTexture() {      // TODO(Rodrigo): Optimize alignment      glPixelStorei(GL_PACK_ALIGNMENT, 1);      SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); -    for (u32 level = 0; level < params.GetNumLevels(); ++level) { +    for (u32 level = 0; level < params.num_levels; ++level) {          glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); +        const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);          if (is_compressed) {              glGetCompressedTextureImage(texture.handle, level,                                          static_cast<GLsizei>(params.GetHostMipmapSize(level)), -                                        GetStagingBufferLevelData(level)); +                                        staging_buffer.data() + mip_offset);          } else {              glGetTextureImage(texture.handle, level, format, type,                                static_cast<GLsizei>(params.GetHostMipmapSize(level)), -                              GetStagingBufferLevelData(level)); +                              staging_buffer.data() + mip_offset);          }      }  } -void CachedSurface::UploadTexture() { +void CachedSurface::UploadTexture(std::vector<u8>& staging_buffer) { +    MICROPROFILE_SCOPE(OpenGL_Texture_Upload);      SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); -    for (u32 level = 0; level < params.GetNumLevels(); ++level) { -        UploadTextureMipmap(level); +    for (u32 level = 0; level < params.num_levels; ++level) { +        UploadTextureMipmap(level, staging_buffer);      }  } -void CachedSurface::UploadTextureMipmap(u32 level) { +void CachedSurface::UploadTextureMipmap(u32 level, std::vector<u8>& staging_buffer) {      // TODO(Rodrigo): Optimize alignment      glPixelStorei(GL_UNPACK_ALIGNMENT, 1);      glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); -    u8* buffer{GetStagingBufferLevelData(level)}; +    const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); +    u8* buffer{staging_buffer.data() + mip_offset};      if (is_compressed) {          const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; -        switch (params.GetTarget()) { +        switch (params.target) {          case SurfaceTarget::Texture2D:              glCompressedTextureSubImage2D(texture.handle, level, 0, 0,                                            static_cast<GLsizei>(params.GetMipWidth(level)), @@ -277,7 +299,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) {              break;          case SurfaceTarget::TextureCubemap: {              const std::size_t layer_size{params.GetHostLayerSize(level)}; -            for (std::size_t face = 0; face < params.GetDepth(); ++face) { +            for (std::size_t face = 0; face < params.depth; ++face) {                  glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),                                                static_cast<GLsizei>(params.GetMipWidth(level)),                                                static_cast<GLsizei>(params.GetMipHeight(level)), 1, @@ -291,7 +313,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) {              UNREACHABLE();          }      } else { -        switch (params.GetTarget()) { +        switch (params.target) {          case SurfaceTarget::Texture1D:              glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,                                  buffer); @@ -310,7 +332,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) {                  static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);              break;          case SurfaceTarget::TextureCubemap: -            for (std::size_t face = 0; face < params.GetDepth(); ++face) { +            for (std::size_t face = 0; face < params.depth; ++face) {                  glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),                                      params.GetMipWidth(level), params.GetMipHeight(level), 1,                                      format, type, buffer); @@ -324,61 +346,57 @@ void CachedSurface::UploadTextureMipmap(u32 level) {  }  void CachedSurface::DecorateSurfaceName() { -    LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), -                  params.GetTarget() == SurfaceTarget::Texture3D ? "3D" : ""); +    LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); +} + +void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { +    LabelGLObject(GL_TEXTURE, texture_view.texture.handle, gpu_addr, prefix);  } -std::unique_ptr<CachedSurfaceView> CachedSurface::CreateView(const ViewKey& view_key) { -    return std::make_unique<CachedSurfaceView>(*this, view_key); +View CachedSurface::CreateView(const ViewParams& view_key) { +    auto view = std::make_shared<CachedSurfaceView>(*this, view_key); +    views[view_key] = view; +    view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); +    return view;  } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, ViewKey key) -    : surface{surface}, key{key}, params{surface.GetSurfaceParams()} {} +CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params) +    : VideoCommon::ViewBase(params), surface{surface} { +    target = GetTextureTarget(params.target); +    texture_view = CreateTextureView(); +}  CachedSurfaceView::~CachedSurfaceView() = default;  void CachedSurfaceView::Attach(GLenum attachment) const { -    ASSERT(key.num_layers == 1 && key.num_levels == 1); +    ASSERT(params.num_layers == 1 && params.num_levels == 1); -    switch (params.GetTarget()) { +    switch (params.target) {      case SurfaceTarget::Texture1D: -        glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), -                               surface.GetTexture(), key.base_level); +        glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, target, +                               surface.GetTexture(), params.base_level);          break;      case SurfaceTarget::Texture2D: -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), -                               surface.GetTexture(), key.base_level); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, target, +                               surface.GetTexture(), params.base_level);          break;      case SurfaceTarget::Texture1DArray:      case SurfaceTarget::Texture2DArray:      case SurfaceTarget::TextureCubemap:      case SurfaceTarget::TextureCubeArray: -        glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), -                                  key.base_level, key.base_layer); +        glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, target, +                                  params.base_level, params.base_layer);          break;      default:          UNIMPLEMENTED();      }  } -GLuint CachedSurfaceView::GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, -                                     SwizzleSource x_source, SwizzleSource y_source, +void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source,                                       SwizzleSource z_source, SwizzleSource w_source) { -    const auto [texture_view, target] = GetTextureView(texture_type, is_array); -    if (texture_view.get().texture.handle == 0) { -        texture_view.get() = std::move(CreateTextureView(target)); -    } -    ApplySwizzle(texture_view, x_source, y_source, z_source, w_source); -    return texture_view.get().texture.handle; -} - -void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_source, -                                     SwizzleSource y_source, SwizzleSource z_source, -                                     SwizzleSource w_source) { -    const std::array<SwizzleSource, 4> swizzle = {x_source, y_source, z_source, w_source}; -    if (swizzle == texture_view.swizzle) { +    u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); +    if (swizzle == texture_view.swizzle)          return; -    }      const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),                                               GetSwizzleSource(z_source),                                               GetSwizzleSource(w_source)}; @@ -386,38 +404,25 @@ void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_      texture_view.swizzle = swizzle;  } -CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView(GLenum target) const { +CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { +    const auto& owner_params = surface.GetSurfaceParams();      TextureView texture_view; -    glGenTextures(1, &texture_view.texture.handle); +    texture_view.texture.Create();      const GLuint handle{texture_view.texture.handle}; -    const FormatTuple& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; +    const FormatTuple& tuple{ +        GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; -    glTextureView(handle, target, surface.texture.handle, tuple.internal_format, key.base_level, -                  key.num_levels, key.base_layer, key.num_layers); -    ApplyTextureDefaults(params, handle); +    glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, +                  params.num_levels, params.base_layer, params.num_layers); -    return texture_view; -} +    ApplyTextureDefaults(owner_params, handle); -std::pair<std::reference_wrapper<CachedSurfaceView::TextureView>, GLenum> -CachedSurfaceView::GetTextureView(Tegra::Shader::TextureType texture_type, bool is_array) { -    using Pair = std::pair<std::reference_wrapper<TextureView>, GLenum>; -    switch (texture_type) { -    case Tegra::Shader::TextureType::Texture1D: -        return is_array ? Pair{texture_view_1d_array, GL_TEXTURE_1D_ARRAY} -                        : Pair{texture_view_1d, GL_TEXTURE_1D}; -    case Tegra::Shader::TextureType::Texture2D: -        return is_array ? Pair{texture_view_2d_array, GL_TEXTURE_2D_ARRAY} -                        : Pair{texture_view_2d, GL_TEXTURE_2D}; -    case Tegra::Shader::TextureType::Texture3D: -        ASSERT(!is_array); -        return {texture_view_3d, GL_TEXTURE_3D}; -    case Tegra::Shader::TextureType::TextureCube: -        return is_array ? Pair{texture_view_cube_array, GL_TEXTURE_CUBE_MAP_ARRAY} -                        : Pair{texture_view_cube, GL_TEXTURE_CUBE_MAP}; -    } -    UNREACHABLE(); +    u32 swizzle = +        EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); +    texture_view.swizzle = swizzle; + +    return texture_view;  }  TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, @@ -426,106 +431,21 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,  TextureCacheOpenGL::~TextureCacheOpenGL() = default; -CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, -                                                             u8* host_ptr, -                                                             const SurfaceParams& new_params, -                                                             bool preserve_contents, -                                                             const std::vector<Surface>& overlaps) { -    if (overlaps.size() > 1) { -        return TryCopyAsViews(gpu_addr, cpu_addr, host_ptr, new_params, overlaps); -    } - -    const auto& old_surface{overlaps[0]}; -    const auto& old_params{old_surface->GetSurfaceParams()}; -    if (old_params.GetTarget() == new_params.GetTarget() && -        old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && -        old_params.GetNumLevels() == new_params.GetNumLevels() && -        old_params.GetPixelFormat() == new_params.GetPixelFormat()) { -        return SurfaceCopy(gpu_addr, cpu_addr, host_ptr, new_params, old_surface, old_params); -    } - -    return nullptr; -} - -CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, -                                                   const SurfaceParams& new_params, -                                                   const Surface& old_surface, -                                                   const SurfaceParams& old_params) { -    const auto new_surface{GetUncachedSurface(new_params)}; -    Register(new_surface, gpu_addr, cpu_addr, host_ptr); - -    const u32 min_width{ -        std::max(old_params.GetDefaultBlockWidth(), new_params.GetDefaultBlockWidth())}; -    const u32 min_height{ -        std::max(old_params.GetDefaultBlockHeight(), new_params.GetDefaultBlockHeight())}; -    for (u32 level = 0; level < old_params.GetNumLevels(); ++level) { -        const u32 width{std::min(old_params.GetMipWidth(level), new_params.GetMipWidth(level))}; -        const u32 height{std::min(old_params.GetMipHeight(level), new_params.GetMipHeight(level))}; -        if (width < min_width || height < min_height) { -            // Avoid copies that are too small to be handled in OpenGL -            break; -        } -        glCopyImageSubData(old_surface->GetTexture(), old_surface->GetTarget(), level, 0, 0, 0, -                           new_surface->GetTexture(), new_surface->GetTarget(), level, 0, 0, 0, -                           width, height, 1); -    } - -    new_surface->MarkAsModified(true); - -    // TODO(Rodrigo): Add an entry to directly get the superview -    return new_surface->GetView(gpu_addr, new_params); -} - -CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, -                                                      u8* host_ptr, const SurfaceParams& new_params, -                                                      const std::vector<Surface>& overlaps) { -    if (new_params.GetTarget() == SurfaceTarget::Texture1D || -        new_params.GetTarget() == SurfaceTarget::Texture1DArray || -        new_params.GetTarget() == SurfaceTarget::Texture3D) { -        // Non-2D textures are not handled at the moment in this fast path. -        return nullptr; -    } - -    const auto new_surface{GetUncachedSurface(new_params)}; -    // TODO(Rodrigo): Move this down -    Register(new_surface, gpu_addr, cpu_addr, host_ptr); - -    // TODO(Rodrigo): Find a way to avoid heap allocations here. -    std::vector<CachedSurfaceView*> views; -    views.reserve(overlaps.size()); -    for (const auto& overlap : overlaps) { -        const auto view{ -            new_surface->TryGetView(overlap->GetGpuAddr(), overlap->GetSurfaceParams())}; -        if (!view) { -            // TODO(Rodrigo): Remove this -            Unregister(new_surface); -            return nullptr; -        } -        views.push_back(view); -    } - -    // TODO(Rodrigo): It's possible that these method leaves some unloaded textures if the data has -    // been uploaded to guest memory but not used as a surface previously. -    for (std::size_t i = 0; i < overlaps.size(); ++i) { -        const auto& overlap{overlaps[i]}; -        const auto& view{views[i]}; -        for (u32 overlap_level = 0; overlap_level < view->GetNumLevels(); ++overlap_level) { -            const u32 super_level{view->GetBaseLevel() + overlap_level}; -            glCopyImageSubData(overlap->GetTexture(), overlap->GetTarget(), overlap_level, 0, 0, 0, -                               new_surface->GetTexture(), new_surface->GetTarget(), super_level, 0, -                               0, view->GetBaseLayer(), view->GetWidth(), view->GetHeight(), -                               view->GetNumLayers()); -        } -    } - -    new_surface->MarkAsModified(true); - -    // TODO(Rodrigo): Add an entry to directly get the superview -    return new_surface->GetView(gpu_addr, new_params); +Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { +    return std::make_shared<CachedSurface>(gpu_addr, params);  } -Surface TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { -    return std::make_unique<CachedSurface>(*this, params); +void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, +                                   const VideoCommon::CopyParams& copy_params) { +    const auto src_handle = src_surface->GetTexture(); +    const auto src_target = src_surface->GetTarget(); +    const auto dst_handle = dst_surface->GetTexture(); +    const auto dst_target = dst_surface->GetTarget(); +    glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, +                       copy_params.source_y, copy_params.source_z, dst_handle, dst_target, +                       copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, +                       copy_params.dest_z, copy_params.width, copy_params.height, +                       copy_params.depth);  }  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c65e37153..1722c1bbc 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -19,24 +19,25 @@  namespace OpenGL {  using VideoCommon::SurfaceParams; -using VideoCommon::ViewKey; +using VideoCommon::ViewParams;  class CachedSurfaceView;  class CachedSurface;  class TextureCacheOpenGL;  using Surface = std::shared_ptr<CachedSurface>; -using TextureCacheBase = VideoCommon::TextureCache<CachedSurface, CachedSurfaceView>; +using View = std::shared_ptr<CachedSurfaceView>; +using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; -class CachedSurface final : public VideoCommon::SurfaceBase<TextureCacheOpenGL, CachedSurfaceView> { +class CachedSurface final : public VideoCommon::SurfaceBase<View> {      friend CachedSurfaceView;  public: -    explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); +    explicit CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params);      ~CachedSurface(); -    void UploadTexture(); -    void DownloadTexture(); +    void UploadTexture(std::vector<u8>& staging_buffer) override; +    void DownloadTexture(std::vector<u8>& staging_buffer) override;      GLenum GetTarget() const {          return target; @@ -49,99 +50,79 @@ public:  protected:      void DecorateSurfaceName(); -    std::unique_ptr<CachedSurfaceView> CreateView(const ViewKey& view_key); +    View CreateView(const ViewParams& view_key) override;  private: -    void UploadTextureMipmap(u32 level); +    void UploadTextureMipmap(u32 level, std::vector<u8>& staging_buffer);      GLenum internal_format{};      GLenum format{};      GLenum type{};      bool is_compressed{};      GLenum target{}; +    u32 view_count{};      OGLTexture texture;  }; -class CachedSurfaceView final { +class CachedSurfaceView final : public VideoCommon::ViewBase {  public: -    explicit CachedSurfaceView(CachedSurface& surface, ViewKey key); +    explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params);      ~CachedSurfaceView();      /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER      void Attach(GLenum attachment) const; -    GLuint GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, -                      Tegra::Texture::SwizzleSource x_source, -                      Tegra::Texture::SwizzleSource y_source, -                      Tegra::Texture::SwizzleSource z_source, -                      Tegra::Texture::SwizzleSource w_source); - -    void MarkAsModified(bool is_modified) { -        surface.MarkAsModified(is_modified); +    GLuint GetTexture() { +        return texture_view.texture.handle;      }      const SurfaceParams& GetSurfaceParams() const { -        return params; +        return surface.GetSurfaceParams();      }      u32 GetWidth() const { -        return params.GetMipWidth(GetBaseLevel()); +        const auto owner_params = GetSurfaceParams(); +        return owner_params.GetMipWidth(params.base_level);      }      u32 GetHeight() const { -        return params.GetMipHeight(GetBaseLevel()); +        const auto owner_params = GetSurfaceParams(); +        return owner_params.GetMipHeight(params.base_level);      }      u32 GetDepth() const { -        return params.GetMipDepth(GetBaseLevel()); -    } - -    u32 GetBaseLayer() const { -        return key.base_layer; +        const auto owner_params = GetSurfaceParams(); +        return owner_params.GetMipDepth(params.base_level);      } -    u32 GetNumLayers() const { -        return key.num_layers; -    } - -    u32 GetBaseLevel() const { -        return key.base_level; -    } +    void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, +                      Tegra::Texture::SwizzleSource y_source, +                      Tegra::Texture::SwizzleSource z_source, +                      Tegra::Texture::SwizzleSource w_source); -    u32 GetNumLevels() const { -        return key.num_levels; -    } +    void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);  private:      struct TextureView { -        OGLTexture texture; -        std::array<Tegra::Texture::SwizzleSource, 4> swizzle{ -            Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, -            Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A}; +        OGLTextureView texture; +        u32 swizzle;      }; -    void ApplySwizzle(TextureView& texture_view, Tegra::Texture::SwizzleSource x_source, +    u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,                        Tegra::Texture::SwizzleSource y_source,                        Tegra::Texture::SwizzleSource z_source, -                      Tegra::Texture::SwizzleSource w_source); - -    TextureView CreateTextureView(GLenum target) const; +                      Tegra::Texture::SwizzleSource w_source) const { +        return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | +               (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); +    } -    std::pair<std::reference_wrapper<TextureView>, GLenum> GetTextureView( -        Tegra::Shader::TextureType texture_type, bool is_array); +    TextureView CreateTextureView() const;      CachedSurface& surface; -    const ViewKey key; -    const SurfaceParams params; - -    TextureView texture_view_1d; -    TextureView texture_view_1d_array; -    TextureView texture_view_2d; -    TextureView texture_view_2d_array; -    TextureView texture_view_3d; -    TextureView texture_view_cube; -    TextureView texture_view_cube_array; +    GLenum target{}; + +    TextureView texture_view;  };  class TextureCacheOpenGL final : public TextureCacheBase { @@ -150,21 +131,9 @@ public:      ~TextureCacheOpenGL();  protected: -    CachedSurfaceView* TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, -                                             const SurfaceParams& new_params, -                                             bool preserve_contents, -                                             const std::vector<Surface>& overlaps); - -    Surface CreateSurface(const SurfaceParams& params); - -private: -    CachedSurfaceView* SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, -                                   const SurfaceParams& new_params, const Surface& old_surface, -                                   const SurfaceParams& old_params); - -    CachedSurfaceView* TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, -                                      const SurfaceParams& new_params, -                                      const std::vector<Surface>& overlaps); +    Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; +    void ImageCopy(Surface src_surface, Surface dst_surface, +                   const VideoCommon::CopyParams& copy_params) override;  };  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 5994c0c61..a9fa539a5 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -56,8 +56,7 @@ SurfaceBlitter::SurfaceBlitter() {  SurfaceBlitter::~SurfaceBlitter() = default; -void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, -                          const Common::Rectangle<u32>& src_rect, +void SurfaceBlitter::Blit(View src, View dst, const Common::Rectangle<u32>& src_rect,                            const Common::Rectangle<u32>& dst_rect) const {      const auto& src_params{src->GetSurfaceParams()};      const auto& dst_params{dst->GetSurfaceParams()}; @@ -72,17 +71,13 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst,      u32 buffers{}; -    UNIMPLEMENTED_IF(src_params.GetTarget() != SurfaceTarget::Texture2D); -    UNIMPLEMENTED_IF(dst_params.GetTarget() != SurfaceTarget::Texture2D); +    UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); +    UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); -    const auto GetTexture = [](CachedSurfaceView* view) { -        return view->GetTexture(TextureType::Texture2D, false, SwizzleSource::R, SwizzleSource::G, -                                SwizzleSource::B, SwizzleSource::A); -    }; -    const GLuint src_texture{GetTexture(src)}; -    const GLuint dst_texture{GetTexture(dst)}; +    const GLuint src_texture{src->GetTexture()}; +    const GLuint dst_texture{dst->GetTexture()}; -    if (src_params.GetType() == SurfaceType::ColorTexture) { +    if (src_params.type == SurfaceType::ColorTexture) {          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,                                 src_texture, 0);          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -94,7 +89,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst,                                 0);          buffers = GL_COLOR_BUFFER_BIT; -    } else if (src_params.GetType() == SurfaceType::Depth) { +    } else if (src_params.type == SurfaceType::Depth) {          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture,                                 0); @@ -106,7 +101,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst,          glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);          buffers = GL_DEPTH_BUFFER_BIT; -    } else if (src_params.GetType() == SurfaceType::DepthStencil) { +    } else if (src_params.type == SurfaceType::DepthStencil) {          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,                                 src_texture, 0); @@ -148,4 +143,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie      glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));  } -} // namespace OpenGL
\ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index e7726d14e..8977d2383 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -39,8 +39,8 @@ public:      explicit SurfaceBlitter();      ~SurfaceBlitter(); -    void Blit(CachedSurfaceView* src, CachedSurfaceView* dst, -              const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) const; +    void Blit(View src, View dst, const Common::Rectangle<u32>& src_rect, +              const Common::Rectangle<u32>& dst_rect) const;  private:      OGLFramebuffer src_framebuffer; @@ -49,4 +49,4 @@ private:  void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); -} // namespace OpenGL
\ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c5c01957a..eb0d9bc10 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@  #include "video_core/memory_manager.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/surface.h" +#include "video_core/texture_cache/copy_params.h"  #include "video_core/texture_cache/surface_base.h"  #include "video_core/texture_cache/surface_params.h"  #include "video_core/texture_cache/surface_view.h" @@ -40,32 +41,42 @@ class RasterizerInterface;  namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; +  template <typename TSurface, typename TView>  class TextureCache { -    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<std::shared_ptr<TSurface>>>; +    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;      using IntervalType = typename IntervalMap::interval_type;  public: +    void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { +        this->memory_manager = &memory_manager; +    } +      void InvalidateRegion(CacheAddr addr, std::size_t size) {          for (const auto& surface : GetSurfacesInRegion(addr, size)) { -            if (!surface->IsRegistered()) { -                // Skip duplicates -                continue; -            }              Unregister(surface);          }      } -    TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { +    void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { +        for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { +            Unregister(surface); +        } +    } + +    TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, +                            const VideoCommon::Shader::Sampler& entry) {          const auto gpu_addr{config.tic.Address()};          if (!gpu_addr) {              return {};          } -        const auto params{SurfaceParams::CreateForTexture(system, config)}; -        return GetSurfaceView(gpu_addr, params, true); +        const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; +        return GetSurface(gpu_addr, params, true).second;      } -    TView* GetDepthBufferSurface(bool preserve_contents) { +    TView GetDepthBufferSurface(bool preserve_contents) {          const auto& regs{system.GPU().Maxwell3D().regs};          const auto gpu_addr{regs.zeta.Address()};          if (!gpu_addr || !regs.zeta_enable) { @@ -75,36 +86,75 @@ public:              system, regs.zeta_width, regs.zeta_height, regs.zeta.format,              regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,              regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; -        return GetSurfaceView(gpu_addr, depth_params, preserve_contents); +        auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); +        if (depth_buffer.target) +            depth_buffer.target->MarkAsProtected(false); +        if (depth_buffer.target) +            depth_buffer.target->MarkAsProtected(true); +        return surface_view.second;      } -    TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { +    TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {          ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);          const auto& regs{system.GPU().Maxwell3D().regs};          if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||              regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { +            SetEmptyColorBuffer(index);              return {};          } -        auto& memory_manager{system.GPU().MemoryManager()}; -        const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; -        const auto gpu_addr{config.Address() + -                            config.base_layer * config.layer_stride * sizeof(u32)}; +        const auto& config{regs.rt[index]}; +        const auto gpu_addr{config.Address()};          if (!gpu_addr) { +            SetEmptyColorBuffer(index);              return {};          } -        return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), -                              preserve_contents); +        auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), +                                       preserve_contents); +        if (render_targets[index].target) +            render_targets[index].target->MarkAsProtected(false); +        render_targets[index].target = surface_view.first; +        if (render_targets[index].target) +            render_targets[index].target->MarkAsProtected(true); +        return surface_view.second; +    } + +    void MarkColorBufferInUse(std::size_t index) { +        if (render_targets[index].target) +            render_targets[index].target->MarkAsModified(true, Tick());      } -    TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { -        return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), -                              true); +    void MarkDepthBufferInUse() { +        if (depth_buffer.target) +            depth_buffer.target->MarkAsModified(true, Tick());      } -    std::shared_ptr<TSurface> TryFindFramebufferSurface(const u8* host_ptr) const { +    void SetEmptyDepthBuffer() { +        if (depth_buffer.target != nullptr) { +            depth_buffer.target->MarkAsProtected(false); +            depth_buffer.target = nullptr; +            depth_buffer.view = nullptr; +        } +    } + +    void SetEmptyColorBuffer(std::size_t index) { +        if (render_targets[index].target != nullptr) { +            render_targets[index].target->MarkAsProtected(false); +            std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); +            render_targets[index].target = nullptr; +            render_targets[index].view = nullptr; +        } +    } + +    TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { +        SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); +        const GPUVAddr gpu_addr = config.Address(); +        return GetSurface(gpu_addr, params, true).second; +    } + +    TSurface TryFindFramebufferSurface(const u8* host_ptr) const {          const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};          return it != registered_surfaces.end() ? *it->second.begin() : nullptr;      } @@ -115,126 +165,334 @@ public:  protected:      TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) -        : system{system}, rasterizer{rasterizer} {} +        : system{system}, rasterizer{rasterizer} { +        for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { +            SetEmptyColorBuffer(i); +        } +        SetEmptyDepthBuffer(); +    }      ~TextureCache() = default; -    virtual TView* TryFastGetSurfaceView( -        GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, -        bool preserve_contents, const std::vector<std::shared_ptr<TSurface>>& overlaps) = 0; +    virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; -    virtual std::shared_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0; +    virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, +                           const CopyParams& copy_params) = 0; -    void Register(std::shared_ptr<TSurface> surface, GPUVAddr gpu_addr, VAddr cpu_addr, -                  u8* host_ptr) { -        surface->Register(gpu_addr, cpu_addr, host_ptr); -        registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); -        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); +    void Register(TSurface surface) { +        const GPUVAddr gpu_addr = surface->GetGpuAddr(); +        u8* host_ptr = memory_manager->GetPointer(gpu_addr); +        const std::size_t size = surface->GetSizeInBytes(); +        const std::optional<VAddr> cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); +        if (!host_ptr || !cpu_addr) { +            LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", +                         gpu_addr); +            return; +        } +        surface->SetHostPtr(host_ptr); +        surface->SetCpuAddr(*cpu_addr); +        registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); +        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); +        RegisterInnerCache(surface); +        surface->MarkAsRegistered(true);      } -    void Unregister(std::shared_ptr<TSurface> surface) { -        registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); -        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); -        surface->Unregister(); +    void Unregister(TSurface surface) { +        if (surface->IsProtected()) +            return; +        const GPUVAddr gpu_addr = surface->GetGpuAddr(); +        const void* host_ptr = surface->GetHostPtr(); +        const std::size_t size = surface->GetSizeInBytes(); +        const VAddr cpu_addr = surface->GetCpuAddr(); +        registered_surfaces.erase(GetInterval(host_ptr, size)); +        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); +        UnregisterInnerCache(surface); +        surface->MarkAsRegistered(false); +        ReserveSurface(surface->GetSurfaceParams(), surface);      } -    std::shared_ptr<TSurface> GetUncachedSurface(const SurfaceParams& params) { -        if (const auto surface = TryGetReservedSurface(params); surface) +    TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { +        if (const auto surface = TryGetReservedSurface(params); surface) { +            surface->SetGpuAddr(gpu_addr);              return surface; +        }          // No reserved surface available, create a new one and reserve it -        auto new_surface{CreateSurface(params)}; -        ReserveSurface(params, new_surface); +        auto new_surface{CreateSurface(gpu_addr, params)};          return new_surface;      }      Core::System& system;  private: -    TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { -        auto& memory_manager{system.GPU().MemoryManager()}; -        const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; -        DEBUG_ASSERT(cpu_addr); - -        const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; -        const auto cache_addr{ToCacheAddr(host_ptr)}; -        auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; -        if (overlaps.empty()) { -            return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); +    enum class RecycleStrategy : u32 { +        Ignore = 0, +        Flush = 1, +        BufferCopy = 3, +    }; + +    RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, +                                 const GPUVAddr gpu_addr, const bool untopological) { +        // Untopological decision +        if (untopological) { +            return RecycleStrategy::Ignore; +        } +        // 3D Textures decision +        if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { +            return RecycleStrategy::Flush;          } +        for (auto s : overlaps) { +            const auto& s_params = s->GetSurfaceParams(); +            if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { +                return RecycleStrategy::Flush; +            } +        } +        return RecycleStrategy::Ignore; +    } -        if (overlaps.size() == 1) { -            if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { -                return view; +    std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, +                                              const SurfaceParams& params, const GPUVAddr gpu_addr, +                                              const u8* host_ptr, const bool preserve_contents, +                                              const bool untopological) { +        for (auto surface : overlaps) { +            Unregister(surface); +        } +        RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation +                                       ? PickStrategy(overlaps, params, gpu_addr, untopological) +                                       : RecycleStrategy::Flush; +        switch (strategy) { +        case RecycleStrategy::Ignore: { +            return InitializeSurface(gpu_addr, params, preserve_contents); +        } +        case RecycleStrategy::Flush: { +            std::sort(overlaps.begin(), overlaps.end(), +                      [](const TSurface& a, const TSurface& b) -> bool { +                          return a->GetModificationTick() < b->GetModificationTick(); +                      }); +            for (auto surface : overlaps) { +                FlushSurface(surface);              } +            return InitializeSurface(gpu_addr, params, preserve_contents);          } +        default: { +            UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); +            return InitializeSurface(gpu_addr, params, preserve_contents); +        } +        } +    } -        const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, -                                                   preserve_contents, overlaps)}; +    std::pair<TSurface, TView> RebuildMirage(TSurface current_surface, +                                             const SurfaceParams& params) { +        const auto gpu_addr = current_surface->GetGpuAddr(); +        TSurface new_surface = GetUncachedSurface(gpu_addr, params); +        std::vector<CopyParams> bricks = current_surface->BreakDown(); +        for (auto& brick : bricks) { +            ImageCopy(current_surface, new_surface, brick); +        } +        Unregister(current_surface); +        Register(new_surface); +        return {new_surface, new_surface->GetMainView()}; +    } -        if (!fast_view) { -            std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { -                return lhs->GetModificationTick() < rhs->GetModificationTick(); -            }); +    std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, +                                                     const SurfaceParams& params) { +        const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); +        if (is_mirage) { +            return RebuildMirage(current_surface, params);          } +        const bool matches_target = current_surface->MatchTarget(params.target); +        if (matches_target) { +            return {current_surface, current_surface->GetMainView()}; +        } +        return {current_surface, current_surface->EmplaceOverview(params)}; +    } -        for (const auto& surface : overlaps) { -            if (!fast_view) { -                // Flush even when we don't care about the contents, to preserve memory not -                // written by the new surface. -                FlushSurface(surface); +    std::optional<std::pair<TSurface, TView>> ReconstructSurface(std::vector<TSurface>& overlaps, +                                                                 const SurfaceParams& params, +                                                                 const GPUVAddr gpu_addr, +                                                                 const u8* host_ptr) { +        if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { +            return {}; +        } +        TSurface new_surface = GetUncachedSurface(gpu_addr, params); +        for (auto surface : overlaps) { +            const SurfaceParams& src_params = surface->GetSurfaceParams(); +            if (src_params.is_layered || src_params.num_levels > 1) { +                // We send this cases to recycle as they are more complex to handle +                return {}; +            } +            const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); +            auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); +            if (!mipmap_layer) { +                return {};              } +            const u32 layer = (*mipmap_layer).first; +            const u32 mipmap = (*mipmap_layer).second; +            if (new_surface->GetMipmapSize(mipmap) != candidate_size) { +                return {}; +            } +            // Now we got all the data set up +            CopyParams copy_params{}; +            const u32 dst_width = params.GetMipWidth(mipmap); +            const u32 dst_height = params.GetMipHeight(mipmap); +            copy_params.width = std::min(src_params.width, dst_width); +            copy_params.height = std::min(src_params.height, dst_height); +            copy_params.depth = 1; +            copy_params.source_level = 0; +            copy_params.dest_level = mipmap; +            copy_params.source_z = 0; +            copy_params.dest_z = layer; +            ImageCopy(surface, new_surface, copy_params); +        } +        for (auto surface : overlaps) {              Unregister(surface);          } -        if (fast_view) { -            return fast_view; +        Register(new_surface); +        return {{new_surface, new_surface->GetMainView()}}; +    } + +    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, +                                          bool preserve_contents) { + +        const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; +        const auto cache_addr{ToCacheAddr(host_ptr)}; +        const std::size_t candidate_size = params.GetGuestSizeInBytes(); +        auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; +        if (overlaps.empty()) { +            return InitializeSurface(gpu_addr, params, preserve_contents); +        } + +        for (auto surface : overlaps) { +            if (!surface->MatchesTopology(params)) { +                return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, +                                      true); +            }          } -        return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); +        if (overlaps.size() == 1) { +            TSurface current_surface = overlaps[0]; +            if (current_surface->MatchesStructure(params) && +                current_surface->GetGpuAddr() == gpu_addr && +                (params.target != SurfaceTarget::Texture3D || +                 current_surface->MatchTarget(params.target))) { +                return ManageStructuralMatch(current_surface, params); +            } +            if (current_surface->GetSizeInBytes() <= candidate_size) { +                return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, +                                      false); +            } +            std::optional<TView> view = current_surface->EmplaceView(params, gpu_addr); +            if (view.has_value()) { +                const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); +                if (is_mirage) { +                    LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); +                    return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, +                                          false); +                } +                return {current_surface, *view}; +            } +            return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); +        } else { +            std::optional<std::pair<TSurface, TView>> view = +                ReconstructSurface(overlaps, params, gpu_addr, host_ptr); +            if (view.has_value()) { +                return *view; +            } +            return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); +        }      } -    TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, -                           const SurfaceParams& params, bool preserve_contents) { -        const auto new_surface{GetUncachedSurface(params)}; -        Register(new_surface, gpu_addr, cpu_addr, host_ptr); +    std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, +                                                 bool preserve_contents) { +        auto new_surface{GetUncachedSurface(gpu_addr, params)}; +        Register(new_surface);          if (preserve_contents) {              LoadSurface(new_surface);          } -        return new_surface->GetView(gpu_addr, params); +        return {new_surface, new_surface->GetMainView()};      } -    void LoadSurface(const std::shared_ptr<TSurface>& surface) { -        surface->LoadBuffer(); -        surface->UploadTexture(); -        surface->MarkAsModified(false); +    void LoadSurface(const TSurface& surface) { +        staging_buffer.resize(surface->GetHostSizeInBytes()); +        surface->LoadBuffer(*memory_manager, staging_buffer); +        surface->UploadTexture(staging_buffer); +        surface->MarkAsModified(false, Tick());      } -    void FlushSurface(const std::shared_ptr<TSurface>& surface) { +    void FlushSurface(const TSurface& surface) {          if (!surface->IsModified()) {              return;          } -        surface->DownloadTexture(); -        surface->FlushBuffer(); +        staging_buffer.resize(surface->GetHostSizeInBytes()); +        surface->DownloadTexture(staging_buffer); +        surface->FlushBuffer(staging_buffer); +        surface->MarkAsModified(false, Tick());      } -    std::vector<std::shared_ptr<TSurface>> GetSurfacesInRegion(CacheAddr cache_addr, -                                                               std::size_t size) const { +    std::vector<TSurface> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {          if (size == 0) {              return {};          }          const IntervalType interval{cache_addr, cache_addr + size}; -        std::vector<std::shared_ptr<TSurface>> surfaces; +        std::vector<TSurface> surfaces;          for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { -            surfaces.push_back(*pair.second.begin()); +            for (auto& s : pair.second) { +                if (!s || !s->IsRegistered()) { +                    continue; +                } +                surfaces.push_back(s); +            }          }          return surfaces;      } -    void ReserveSurface(const SurfaceParams& params, std::shared_ptr<TSurface> surface) { +    void RegisterInnerCache(TSurface& surface) { +        GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; +        const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; +        while (start <= end) { +            inner_cache[start].push_back(surface); +            start++; +        } +    } + +    void UnregisterInnerCache(TSurface& surface) { +        GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; +        const GPUVAddr end = (surface->GetGpuAddrEnd() - 1)  >> inner_cache_page_bits; +        while (start <= end) { +            inner_cache[start].remove(surface); +            start++; +        } +    } + +    std::vector<TSurface> GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { +        if (size == 0) { +            return {}; +        } +        const GPUVAddr gpu_addr_end = gpu_addr + size; +        GPUVAddr start = gpu_addr >> inner_cache_page_bits; +        const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; +        std::vector<TSurface> surfaces; +        while (start <= end) { +            std::list<TSurface>& list = inner_cache[start]; +            for (auto& s : list) { +                if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { +                    s->MarkAsPicked(true); +                    surfaces.push_back(s); +                } +            } +            start++; +        } +        for (auto& s : surfaces) { +            s->MarkAsPicked(false); +        } +        return surfaces; +    } + +    void ReserveSurface(const SurfaceParams& params, TSurface surface) {          surface_reserve[params].push_back(std::move(surface));      } -    std::shared_ptr<TSurface> TryGetReservedSurface(const SurfaceParams& params) { +    TSurface TryGetReservedSurface(const SurfaceParams& params) {          auto search{surface_reserve.find(params)};          if (search == surface_reserve.end()) {              return {}; @@ -247,21 +505,41 @@ private:          return {};      } -    IntervalType GetSurfaceInterval(std::shared_ptr<TSurface> surface) const { -        return IntervalType::right_open(surface->GetCacheAddr(), -                                        surface->GetCacheAddr() + surface->GetSizeInBytes()); +    IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { +        const CacheAddr addr = ToCacheAddr(host_ptr); +        return IntervalType::right_open(addr, addr + size);      } +    struct RenderInfo { +        RenderTargetConfig config; +        TSurface target; +        TView view; +    }; + +    struct DepthBufferInfo { +        TSurface target; +        TView view; +    }; +      VideoCore::RasterizerInterface& rasterizer; +    Tegra::MemoryManager* memory_manager;      u64 ticks{};      IntervalMap registered_surfaces; +    static constexpr u64 inner_cache_page_bits{20}; +    static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; +    std::unordered_map<GPUVAddr, std::list<TSurface>> inner_cache; +      /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have      /// previously been used. This is to prevent surfaces from being constantly created and      /// destroyed when used with different surface parameters. -    std::unordered_map<SurfaceParams, std::list<std::shared_ptr<TSurface>>> surface_reserve; +    std::unordered_map<SurfaceParams, std::list<TSurface>> surface_reserve; +    std::array<RenderInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> render_targets; +    DepthBufferInfo depth_buffer; + +    std::vector<u8> staging_buffer;  };  } // namespace VideoCommon | 
