diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 135 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 40 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 6 | 
3 files changed, 109 insertions, 72 deletions
| diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 9c8925383..591ec7998 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -78,6 +78,29 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {      }  } +std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const { +    const u32 compression_factor{GetCompressionFactor(pixel_format)}; +    const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; +    u32 m_depth = (layer_only ? 1U : depth); +    u32 m_width = std::max(1U, width / compression_factor); +    u32 m_height = std::max(1U, height / compression_factor); +    std::size_t size = Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height, +                                                     m_depth, block_height, block_depth); +    u32 m_block_height = block_height; +    u32 m_block_depth = block_depth; +    std::size_t block_size_bytes = 512 * block_height * block_depth; // 512 is GOB size +    for (u32 i = 1; i < max_mip_level; i++) { +        m_width = std::max(1U, m_width / 2); +        m_height = std::max(1U, m_height / 2); +        m_depth = std::max(1U, m_depth / 2); +        m_block_height = std::max(1U, m_block_height / 2); +        m_block_depth = std::max(1U, m_block_depth / 2); +        size += Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height, m_depth, +                                              m_block_height, m_block_depth); +    } +    return is_tiled ? Common::AlignUp(size, block_size_bytes) : size; +} +  /*static*/ SurfaceParams SurfaceParams::CreateForTexture(      const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {      SurfaceParams params{}; @@ -124,6 +147,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {          break;      } +    params.is_layered = SurfaceTargetIsLayered(params.target);      params.max_mip_level = config.tic.max_mip_level + 1;      params.rt = {}; @@ -150,6 +174,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {      params.target = SurfaceTarget::Texture2D;      params.depth = 1;      params.max_mip_level = 0; +    params.is_layered = false;      // Render target specific parameters, not used for caching      params.rt.index = static_cast<u32>(index); @@ -182,6 +207,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {      params.target = SurfaceTarget::Texture2D;      params.depth = 1;      params.max_mip_level = 0; +    params.is_layered = false;      params.rt = {};      params.InitCacheParameters(zeta_address); @@ -361,10 +387,11 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d      }  } -static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), -                            SurfaceParams::MaxPixelFormat> -    morton_to_gl_fns = { -        // clang-format off +using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), +                                     SurfaceParams::MaxPixelFormat>; + +static constexpr GLConversionArray morton_to_gl_fns = { +    // clang-format off          MortonCopy<true, PixelFormat::ABGR8U>,          MortonCopy<true, PixelFormat::ABGR8S>,          MortonCopy<true, PixelFormat::ABGR8UI>, @@ -418,13 +445,11 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,          MortonCopy<true, PixelFormat::Z24S8>,          MortonCopy<true, PixelFormat::S8Z24>,          MortonCopy<true, PixelFormat::Z32FS8>, -        // clang-format on +    // clang-format on  }; -static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), -                            SurfaceParams::MaxPixelFormat> -    gl_to_morton_fns = { -        // clang-format off +static constexpr GLConversionArray gl_to_morton_fns = { +    // clang-format off          MortonCopy<false, PixelFormat::ABGR8U>,          MortonCopy<false, PixelFormat::ABGR8S>,          MortonCopy<false, PixelFormat::ABGR8UI>, @@ -479,9 +504,35 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,          MortonCopy<false, PixelFormat::Z24S8>,          MortonCopy<false, PixelFormat::S8Z24>,          MortonCopy<false, PixelFormat::Z32FS8>, -        // clang-format on +    // clang-format on  }; +void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params, +                 std::vector<u8>& gl_buffer) { +    u32 depth = params.depth; +    if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { +        // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. +        depth = 1U; +    } +    if (params.is_layered) { +        u64 offset = 0; +        u64 offset_gl = 0; +        u64 layer_size = params.LayerMemorySize(); +        u64 gl_size = params.LayerSizeGL(); +        for (u32 i = 0; i < depth; i++) { +            functions[static_cast<std::size_t>(params.pixel_format)]( +                params.width, params.block_height, params.height, params.block_depth, 1, +                gl_buffer.data() + offset_gl, gl_size, params.addr + offset); +            offset += layer_size; +            offset_gl += gl_size; +        } +    } else { +        functions[static_cast<std::size_t>(params.pixel_format)]( +            params.width, params.block_height, params.height, params.block_depth, depth, +            gl_buffer.data(), gl_buffer.size(), params.addr); +    } +} +  static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,                          GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,                          GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -881,21 +932,10 @@ void CachedSurface::LoadGLBuffer() {      gl_buffer.resize(params.size_in_bytes_gl);      if (params.is_tiled) { -        u32 depth = params.depth; -        u32 block_depth = params.block_depth; -          ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",                     params.block_width, static_cast<u32>(params.target)); -        if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { -            // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. -            depth = 1U; -            block_depth = 1U; -        } - -        morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( -            params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(), -            gl_buffer.size(), params.addr); +        SwizzleFunc(morton_to_gl_fns, params, gl_buffer);      } else {          const auto texture_src_data{Memory::GetPointer(params.addr)};          const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; @@ -929,19 +969,10 @@ void CachedSurface::FlushGLBuffer() {      const u8* const texture_src_data = Memory::GetPointer(params.addr);      ASSERT(texture_src_data);      if (params.is_tiled) { -        u32 depth = params.depth; -        u32 block_depth = params.block_depth; -          ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",                     params.block_width, static_cast<u32>(params.target)); -        if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { -            // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. -            depth = 1U; -        } -        gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( -            params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(), -            gl_buffer.size(), GetAddr()); +        SwizzleFunc(gl_to_morton_fns, params, gl_buffer);      } else {          std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes());      } @@ -1179,7 +1210,7 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,                                                  const Surface& dst_surface) {      const auto& src_params{src_surface->GetSurfaceParams()};      const auto& dst_params{dst_surface->GetSurfaceParams()}; -    FlushRegion(src_params.addr, dst_params.size_in_bytes); +    FlushRegion(src_params.addr, dst_params.MemorySize());      LoadSurface(dst_surface);  } @@ -1221,44 +1252,10 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,              CopySurface(old_surface, new_surface, copy_pbo.handle);          }          break; +    case SurfaceParams::SurfaceTarget::TextureCubemap:      case SurfaceParams::SurfaceTarget::Texture3D:          AccurateCopySurface(old_surface, new_surface);          break; -    case SurfaceParams::SurfaceTarget::TextureCubemap: { -        if (old_params.rt.array_mode != 1) { -            // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this -            // yet (array rendering used as a cubemap texture). -            LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode); -            UNREACHABLE(); -            return new_surface; -        } - -        // This seems to be used for render-to-cubemap texture -        ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected"); -        ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected"); -        ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented"); - -        // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels. -        // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild. -        const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)}; - -        for (std::size_t index = 0; index < new_params.depth; ++index) { -            Surface face_surface{TryGetReservedSurface(old_params)}; -            ASSERT_MSG(face_surface, "Unexpected"); - -            if (is_blit) { -                BlitSurface(face_surface, new_surface, read_framebuffer.handle, -                            draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index, -                            new_params.rt.index, index); -            } else { -                CopySurface(face_surface, new_surface, copy_pbo.handle, -                            face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index); -            } - -            old_params.addr += byte_stride; -        } -        break; -    }      default:          LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",                       static_cast<u32>(new_params.target)); @@ -1266,7 +1263,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,      }      return new_surface; -} +} // namespace OpenGL  Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {      return TryGet(addr); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0dd0d90a3..50a7ab47d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -168,6 +168,23 @@ struct SurfaceParams {          }      } +    static bool SurfaceTargetIsLayered(SurfaceTarget target) { +        switch (target) { +        case SurfaceTarget::Texture1D: +        case SurfaceTarget::Texture2D: +        case SurfaceTarget::Texture3D: +            return false; +        case SurfaceTarget::Texture1DArray: +        case SurfaceTarget::Texture2DArray: +        case SurfaceTarget::TextureCubemap: +            return true; +        default: +            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); +            UNREACHABLE(); +            return false; +        } +    } +      /**       * Gets the compression factor for the specified PixelFormat. This applies to just the       * "compressed width" and "compressed height", not the overall compression factor of a @@ -742,6 +759,25 @@ struct SurfaceParams {          return size_in_bytes_gl / 6;      } +    /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. +    std::size_t MemorySize() const { +        std::size_t size = InnerMemorySize(is_layered); +        if (is_layered) +            return size * depth; +        return size; +    } + +    /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including +    /// mipmaps. +    std::size_t LayerMemorySize() const { +        return InnerMemorySize(true); +    } + +    /// Returns the size of a layer of this surface in OpenGL. +    std::size_t LayerSizeGL() const { +        return SizeInBytesRaw(true) / depth; +    } +      /// Creates SurfaceParams from a texture configuration      static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,                                            const GLShader::SamplerEntry& entry); @@ -782,6 +818,7 @@ struct SurfaceParams {      u32 unaligned_height;      SurfaceTarget target;      u32 max_mip_level; +    bool is_layered;      // Parameters used for caching      VAddr addr; @@ -797,6 +834,9 @@ struct SurfaceParams {          u32 layer_stride;          u32 base_layer;      } rt; + +private: +    std::size_t InnerMemorySize(bool layer_only = false) const;  };  }; // namespace OpenGL diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index da7989db9..550ca856c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -319,13 +319,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat  std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,                            u32 block_height, u32 block_depth) {      if (tiled) { -        const u32 gobs_in_x = 64 / bytes_per_pixel; +        const u32 gobs_in_x = 64;          const u32 gobs_in_y = 8;          const u32 gobs_in_z = 1; -        const u32 aligned_width = Common::AlignUp(width, gobs_in_x); +        const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gobs_in_x);          const u32 aligned_height = Common::AlignUp(height, gobs_in_y * block_height);          const u32 aligned_depth = Common::AlignUp(depth, gobs_in_z * block_depth); -        return aligned_width * aligned_height * aligned_depth * bytes_per_pixel; +        return aligned_width * aligned_height * aligned_depth;      } else {          return width * height * depth * bytes_per_pixel;      } | 
