diff options
| author | bunnei <bunneidev@gmail.com> | 2018-06-29 14:07:28 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-06-29 14:07:28 -0400 | 
| commit | 50ef2beb5850362dda9c1d50531475f0c5b6e8df (patch) | |
| tree | ea63b3666a58af16fe7985c28fccc7890c45f951 | |
| parent | da2bdbc0d7772fe195611fb163df2dbd533a6b56 (diff) | |
| parent | c18425ef989fd0c7f9bc1bdf4ba6b5e9235a8193 (diff) | |
Merge pull request #595 from bunnei/raster-cache
Rewrite the OpenGL rasterizer cache
| -rw-r--r-- | src/core/settings.h | 1 | ||||
| -rw-r--r-- | src/core/telemetry_session.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 36 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 107 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1374 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 331 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 2 | ||||
| -rw-r--r-- | src/yuzu/configuration/config.cpp | 3 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics.cpp | 2 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics.ui | 7 | ||||
| -rw-r--r-- | src/yuzu_cmd/config.cpp | 2 | ||||
| -rw-r--r-- | src/yuzu_cmd/default_ini.h | 4 | 
15 files changed, 425 insertions, 1454 deletions
diff --git a/src/core/settings.h b/src/core/settings.h index a7f1e5fa0..7150d9755 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -129,6 +129,7 @@ struct Values {      // Renderer      float resolution_factor;      bool toggle_framelimit; +    bool use_accurate_framebuffers;      float bg_red;      float bg_green; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index a60aa1143..270d68222 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -161,6 +161,8 @@ TelemetrySession::TelemetrySession() {               Settings::values.resolution_factor);      AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",               Settings::values.toggle_framelimit); +    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateFramebuffers", +             Settings::values.use_accurate_framebuffers);      AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",               Settings::values.use_docked_mode);  } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 2dc251205..180be4ff4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -321,6 +321,24 @@ public:              INSERT_PADDING_WORDS(1);          }; +        struct RenderTargetConfig { +            u32 address_high; +            u32 address_low; +            u32 width; +            u32 height; +            Tegra::RenderTargetFormat format; +            u32 block_dimensions; +            u32 array_mode; +            u32 layer_stride; +            u32 base_layer; +            INSERT_PADDING_WORDS(7); + +            GPUVAddr Address() const { +                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | +                                             address_low); +            } +        }; +          union {              struct {                  INSERT_PADDING_WORDS(0x45); @@ -333,23 +351,7 @@ public:                  INSERT_PADDING_WORDS(0x1B8); -                struct { -                    u32 address_high; -                    u32 address_low; -                    u32 width; -                    u32 height; -                    Tegra::RenderTargetFormat format; -                    u32 block_dimensions; -                    u32 array_mode; -                    u32 layer_stride; -                    u32 base_layer; -                    INSERT_PADDING_WORDS(7); - -                    GPUVAddr Address() const { -                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | -                                                     address_low); -                    } -                } rt[NumRenderTargets]; +                RenderTargetConfig rt[NumRenderTargets];                  struct {                      f32 scale_x; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f0e48a802..145e58334 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -51,9 +51,8 @@ public:      }      /// Attempt to use a faster method to display the framebuffer to screen -    virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, -                                   VAddr framebuffer_addr, u32 pixel_stride, -                                   ScreenInfo& screen_info) { +    virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, +                                   u32 pixel_stride, ScreenInfo& screen_info) {          return false;      } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3fbf8e1f9..62ee45a36 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,          u64 size = end - start + 1;          // Copy vertex array data -        res_cache.FlushRegion(start, size, nullptr);          Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);          // Bind the vertex array to the buffer at the current offset. @@ -325,29 +324,22 @@ void RasterizerOpenGL::DrawArrays() {      std::tie(color_surface, depth_surface, surfaces_rect) =          res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); -    const u16 res_scale = color_surface != nullptr -                              ? color_surface->res_scale -                              : (depth_surface == nullptr ? 1u : depth_surface->res_scale); -      MathUtil::Rectangle<u32> draw_rect{ +        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, +                                         surfaces_rect.left, surfaces_rect.right)), // Left +        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, +                                         surfaces_rect.bottom, surfaces_rect.top)), // Top +        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right, +                                         surfaces_rect.left, surfaces_rect.right)), // Right          static_cast<u32>( -            std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, -                            surfaces_rect.left, surfaces_rect.right)), // Left -        static_cast<u32>( -            std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale, -                            surfaces_rect.bottom, surfaces_rect.top)), // Top -        static_cast<u32>( -            std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale, -                            surfaces_rect.left, surfaces_rect.right)), // Right -        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + -                                             viewport_rect.bottom * res_scale, -                                         surfaces_rect.bottom, surfaces_rect.top))}; // Bottom +            std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom, +                            surfaces_rect.bottom, surfaces_rect.top))}; // Bottom      // Bind the framebuffer surfaces      BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);      // Sync the viewport -    SyncViewport(surfaces_rect, res_scale); +    SyncViewport(surfaces_rect);      // Sync the blend state registers      SyncBlendState(); @@ -442,19 +434,11 @@ void RasterizerOpenGL::DrawArrays() {      state.Apply();      // Mark framebuffer surfaces as dirty -    MathUtil::Rectangle<u32> draw_rect_unscaled{ -        draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, -        draw_rect.bottom / res_scale}; -      if (color_surface != nullptr && write_color_fb) { -        auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); -        res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), -                                   color_surface); +        res_cache.MarkSurfaceAsDirty(color_surface);      }      if (depth_surface != nullptr && write_depth_fb) { -        auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); -        res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), -                                   depth_surface); +        res_cache.MarkSurfaceAsDirty(depth_surface);      }  } @@ -462,7 +446,7 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}  void RasterizerOpenGL::FlushAll() {      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    res_cache.FlushAll(); +    res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);  }  void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { @@ -472,13 +456,13 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {  void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    res_cache.InvalidateRegion(addr, size, nullptr); +    res_cache.InvalidateRegion(addr, size);  }  void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {      MICROPROFILE_SCOPE(OpenGL_CacheManagement);      res_cache.FlushRegion(addr, size); -    res_cache.InvalidateRegion(addr, size, nullptr); +    res_cache.InvalidateRegion(addr, size);  }  bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { @@ -497,45 +481,28 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) {      return true;  } -bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, +bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,                                           VAddr framebuffer_addr, u32 pixel_stride,                                           ScreenInfo& screen_info) { -    if (framebuffer_addr == 0) { -        return false; +    if (!framebuffer_addr) { +        return {};      } +      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    SurfaceParams src_params; -    src_params.cpu_addr = framebuffer_addr; -    src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); -    src_params.width = std::min(framebuffer.width, pixel_stride); -    src_params.height = framebuffer.height; -    src_params.stride = pixel_stride; -    src_params.is_tiled = true; -    src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; -    src_params.pixel_format = -        SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); -    src_params.component_type = -        SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); -    src_params.UpdateParams(); - -    MathUtil::Rectangle<u32> src_rect; -    Surface src_surface; -    std::tie(src_surface, src_rect) = -        res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - -    if (src_surface == nullptr) { -        return false; +    const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; +    if (!surface) { +        return {};      } -    u32 scaled_width = src_surface->GetScaledWidth(); -    u32 scaled_height = src_surface->GetScaledHeight(); - -    screen_info.display_texcoords = MathUtil::Rectangle<float>( -        (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, -        (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); +    // Verify that the cached surface is the same size and format as the requested framebuffer +    const auto& params{surface->GetSurfaceParams()}; +    const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)}; +    ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); +    ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); +    ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); -    screen_info.display_texture = src_surface->texture.handle; +    screen_info.display_texture = surface->Texture().handle;      return true;  } @@ -674,7 +641,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,          texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);          Surface surface = res_cache.GetTextureSurface(texture);          if (surface != nullptr) { -            state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; +            state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;              state.texture_units[current_bindpoint].swizzle.r =                  MaxwellToGL::SwizzleSource(texture.tic.x_source);              state.texture_units[current_bindpoint].swizzle.g = @@ -700,16 +667,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,      state.Apply();      glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, -                           color_surface != nullptr ? color_surface->texture.handle : 0, 0); +                           color_surface != nullptr ? color_surface->Texture().handle : 0, 0);      if (depth_surface != nullptr) {          if (has_stencil) {              // attach both depth and stencil              glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, -                                   depth_surface->texture.handle, 0); +                                   depth_surface->Texture().handle, 0);          } else {              // attach depth              glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, -                                   depth_surface->texture.handle, 0); +                                   depth_surface->Texture().handle, 0);              // clear stencil attachment              glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);          } @@ -720,14 +687,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,      }  } -void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {      const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;      const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; -    state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; -    state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; -    state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); -    state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); +    state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left; +    state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom; +    state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); +    state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());  }  void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4762983c9..621200f03 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -109,7 +109,7 @@ private:                        u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);      /// Syncs the viewport to match the guest state -    void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); +    void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);      /// Syncs the clip enabled status to match the guest state      void SyncClipEnabled(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 857164ff6..63f5999ea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1,37 +1,23 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included.  #include <algorithm> -#include <atomic> -#include <cstring> -#include <iterator> -#include <memory> -#include <utility> -#include <vector> -#include <boost/optional.hpp> -#include <boost/range/iterator_range.hpp>  #include <glad/glad.h> +  #include "common/alignment.h" -#include "common/bit_field.h" -#include "common/color.h" -#include "common/logging/log.h" -#include "common/math_util.h" +#include "common/assert.h"  #include "common/microprofile.h"  #include "common/scope_exit.h"  #include "core/core.h" -#include "core/frontend/emu_window.h"  #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h"  #include "core/memory.h"  #include "core/settings.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_state.h"  #include "video_core/textures/astc.h"  #include "video_core/textures/decoders.h"  #include "video_core/utils.h" -#include "video_core/video_core.h"  using SurfaceType = SurfaceParams::SurfaceType;  using PixelFormat = SurfaceParams::PixelFormat; @@ -44,6 +30,40 @@ struct FormatTuple {      bool compressed;  }; +/*static*/ SurfaceParams SurfaceParams::CreateForTexture( +    const Tegra::Texture::FullTextureInfo& config) { + +    SurfaceParams params{}; +    params.addr = config.tic.Address(); +    params.is_tiled = config.tic.IsTiled(); +    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, +    params.pixel_format = PixelFormatFromTextureFormat(config.tic.format); +    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); +    params.type = GetFormatType(params.pixel_format); +    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); +    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); +    params.unaligned_height = config.tic.Height(); +    params.size_in_bytes = params.SizeInBytes(); +    return params; +} + +/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( +    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { + +    SurfaceParams params{}; +    params.addr = config.Address(); +    params.is_tiled = true; +    params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; +    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); +    params.component_type = ComponentTypeFromRenderTarget(config.format); +    params.type = GetFormatType(params.pixel_format); +    params.width = config.width; +    params.height = config.height; +    params.unaligned_height = config.height; +    params.size_in_bytes = params.SizeInBytes(); +    return params; +} +  static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                    // ABGR8      {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                       // B5G6R5 @@ -63,8 +83,8 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType      const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);      if (type == SurfaceType::ColorTexture) {          ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); -        // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are -        // type FLOAT +        // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which +        // are type FLOAT          ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||                 pixel_format == PixelFormat::R11FG11FB10F);          return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; @@ -77,65 +97,70 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType      return {};  } -template <typename Map, typename Interval> -constexpr auto RangeFromInterval(Map& map, const Interval& interval) { -    return boost::make_iterator_range(map.equal_range(interval)); +VAddr SurfaceParams::GetCpuAddr() const { +    const auto& gpu = Core::System::GetInstance().GPU(); +    return *gpu.memory_manager->GpuToCpuAddress(addr);  } -static u16 GetResolutionScaleFactor() { -    return static_cast<u16>(!Settings::values.resolution_factor -                                ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() -                                : Settings::values.resolution_factor); +static bool IsPixelFormatASTC(PixelFormat format) { +    switch (format) { +    case PixelFormat::ASTC_2D_4X4: +        return true; +    default: +        return false; +    }  } -static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { -    u32 block_width{}; -    u32 block_height{}; - +static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {      switch (format) {      case PixelFormat::ASTC_2D_4X4: -        block_width = 4; -        block_height = 4; -        break; +        return {4, 4};      default:          NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));          UNREACHABLE();      } +} + +MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { +    u32 actual_height{unaligned_height}; +    if (IsPixelFormatASTC(pixel_format)) { +        // ASTC formats must stop at the ATSC block size boundary +        actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); +    } +    return {0, actual_height, width, 0}; +} +static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { +    u32 block_width{}; +    u32 block_height{}; +    std::tie(block_width, block_height) = GetASTCBlockSize(format);      data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);  }  template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, -                Tegra::GPUVAddr start, Tegra::GPUVAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) {      constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;      constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);      const auto& gpu = Core::System::GetInstance().GPU();      if (morton_to_gl) {          auto data = Tegra::Texture::UnswizzleTexture( -            *gpu.memory_manager->GpuToCpuAddress(base), +            *gpu.memory_manager->GpuToCpuAddress(addr),              SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); -        if (SurfaceParams::IsFormatASTC(format)) { -            // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this -            ConvertASTCToRGBA8(data, format, stride, height); -        } -          std::memcpy(gl_buffer, data.data(), data.size());      } else { -        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check -        // the configuration for this and perform more generic un/swizzle +        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should +        // check the configuration for this and perform more generic un/swizzle          NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");          VideoCore::MortonCopyPixels128(              stride, height, bytes_per_pixel, gl_bytes_per_pixel, -            Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, +            Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer,              morton_to_gl);      }  } -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, -                                     Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),                              SurfaceParams::MaxPixelFormat>      morton_to_gl_fns = {          MortonCopy<true, PixelFormat::ABGR8>,        MortonCopy<true, PixelFormat::B5G6R5>, @@ -146,8 +171,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:          MortonCopy<true, PixelFormat::DXN1>,         MortonCopy<true, PixelFormat::ASTC_2D_4X4>,  }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, -                                     Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),                              SurfaceParams::MaxPixelFormat>      gl_to_morton_fns = {          MortonCopy<false, PixelFormat::ABGR8>, @@ -192,374 +216,79 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup      cur_state.Apply();  } -static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, -                         const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, -                         GLuint read_fb_handle, GLuint draw_fb_handle) { - -    glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, -                       GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), -                       src_rect.GetHeight(), 0); -    return true; -} - -static bool FillSurface(const Surface& surface, const u8* fill_data, -                        const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { -    UNREACHABLE(); -    return {}; -} - -SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { -    SurfaceParams params = *this; -    const u32 tiled_size = is_tiled ? 8 : 1; -    const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); -    Tegra::GPUVAddr aligned_start = -        addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); -    Tegra::GPUVAddr aligned_end = -        addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); - -    if (aligned_end - aligned_start > stride_tiled_bytes) { -        params.addr = aligned_start; -        params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride)); -    } else { -        // 1 row -        ASSERT(aligned_end - aligned_start == stride_tiled_bytes); -        const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); -        aligned_start = -            addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); -        aligned_end = -            addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); -        params.addr = aligned_start; -        params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size); -        params.stride = params.width; -        params.height = tiled_size; -    } -    params.UpdateParams(); - -    return params; -} - -SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { -    if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { -        return {}; -    } - -    if (is_tiled) { -        unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; -        unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; -        unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; -        unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; -    } - -    const u32 stride_tiled = !is_tiled ? stride : stride * 8; - -    const u32 pixel_offset = -        stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + -        unscaled_rect.left; - -    const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); - -    return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; -} - -MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { -    const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr)); - -    if (is_tiled) { -        const int x0 = (begin_pixel_index % (stride * 8)) / 8; -        const int y0 = (begin_pixel_index / (stride * 8)) * 8; -        // Top to bottom -        return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, -                                        height - (y0 + sub_surface.height)); -    } - -    const int x0 = begin_pixel_index % stride; -    const int y0 = begin_pixel_index / stride; -    // Bottom to top -    return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); -} - -MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { -    auto rect = GetSubRect(sub_surface); -    rect.left = rect.left * res_scale; -    rect.right = rect.right * res_scale; -    rect.top = rect.top * res_scale; -    rect.bottom = rect.bottom * res_scale; -    return rect; -} - -bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { -    return std::tie(other_surface.addr, other_surface.width, other_surface.height, -                    other_surface.stride, other_surface.block_height, other_surface.pixel_format, -                    other_surface.component_type, -                    other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, -                                                        pixel_format, component_type, is_tiled) && -           pixel_format != PixelFormat::Invalid; -} - -bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { -    return sub_surface.addr >= addr && sub_surface.end <= end && -           sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && -           sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && -           sub_surface.component_type == component_type && -           (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && -           (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && -           GetSubRect(sub_surface).left + sub_surface.width <= stride; -} - -bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { -    return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && -           addr <= expanded_surface.end && expanded_surface.addr <= end && -           is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && -           component_type == expanded_surface.component_type && stride == expanded_surface.stride && -           (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % -                   BytesInPixels(stride * (is_tiled ? 8 : 1)) == -               0; -} - -bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { -    if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || -        end < texcopy_params.end) { -        return false; -    } -    if (texcopy_params.block_height != block_height || -        texcopy_params.component_type != component_type) -        return false; - -    if (texcopy_params.width != texcopy_params.stride) { -        const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); -        return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && -               texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && -               (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && -               ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; -    } -    return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); -} - -VAddr SurfaceParams::GetCpuAddr() const { -    // When this function is used, only cpu_addr or (GPU) addr should be set, not both -    ASSERT(!(cpu_addr && addr)); -    const auto& gpu = Core::System::GetInstance().GPU(); -    return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, -                            SurfaceInterval fill_interval) const { -    if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && -        boost::icl::first(fill_interval) >= addr && -        boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range -        dest_surface.FromInterval(fill_interval).GetInterval() == -            fill_interval) { // make sure interval is a rectangle in dest surface -        if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { -            // Check if bits repeat for our fill_size -            const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); -            std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); - -            for (u32 i = 0; i < dest_bytes_per_pixel; ++i) -                std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - -            for (u32 i = 0; i < fill_size; ++i) -                if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], -                                dest_bytes_per_pixel) != 0) -                    return false; - -            if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) -                return false; -        } -        return true; -    } -    return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, -                            SurfaceInterval copy_interval) const { -    SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); -    ASSERT(subrect_params.GetInterval() == copy_interval); -    if (CanSubRect(subrect_params)) -        return true; - -    if (CanFill(dest_surface, copy_interval)) -        return true; - -    return false; -} - -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { -    SurfaceInterval result{}; -    const auto valid_regions = -        SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; -    for (auto& valid_interval : valid_regions) { -        const SurfaceInterval aligned_interval{ -            addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, -                                   BytesInPixels(is_tiled ? 8 * 8 : 1)), -            addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, -                                     BytesInPixels(is_tiled ? 8 * 8 : 1))}; - -        if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || -            boost::icl::length(aligned_interval) == 0) { -            continue; -        } - -        // Get the rectangle within aligned_interval -        const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1); -        SurfaceInterval rect_interval{ -            addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), -            addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), -        }; -        if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { -            // 1 row -            rect_interval = aligned_interval; -        } else if (boost::icl::length(rect_interval) == 0) { -            // 2 rows that do not make a rectangle, return the larger one -            const SurfaceInterval row1{boost::icl::first(aligned_interval), -                                       boost::icl::first(rect_interval)}; -            const SurfaceInterval row2{boost::icl::first(rect_interval), -                                       boost::icl::last_next(aligned_interval)}; -            rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; -        } - -        if (boost::icl::length(rect_interval) > boost::icl::length(result)) { -            result = rect_interval; -        } -    } -    return result; -} - -void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, -                                        SurfaceInterval copy_interval) { -    SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); -    ASSERT(subrect_params.GetInterval() == copy_interval); - -    ASSERT(src_surface != dst_surface); - -    // This is only called when CanCopy is true, no need to run checks here -    if (src_surface->type == SurfaceType::Fill) { -        // FillSurface needs a 4 bytes buffer -        const u64 fill_offset = -            (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; -        std::array<u8, 4> fill_buffer; - -        u64 fill_buff_pos = fill_offset; -        for (int i : {0, 1, 2, 3}) -            fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - -        FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), -                    draw_framebuffer.handle); -        return; -    } -    if (src_surface->CanSubRect(subrect_params)) { -        BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), -                     dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), -                     src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); -        return; -    } -    UNREACHABLE(); +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { +    texture.Create(); +    const auto& rect{params.GetRect()}; +    AllocateSurfaceTexture(texture.handle, +                           GetFormatTuple(params.pixel_format, params.component_type), +                           rect.GetWidth(), rect.GetHeight());  }  MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { -    ASSERT(type != SurfaceType::Fill); +void CachedSurface::LoadGLBuffer() { +    ASSERT(params.type != SurfaceType::Fill); -    u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); -    if (texture_src_data == nullptr) -        return; +    u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); -    if (gl_buffer == nullptr) { -        gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); -        gl_buffer.reset(new u8[gl_buffer_size]); -    } +    ASSERT(texture_src_data); -    MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); +    gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); -    ASSERT(load_start >= addr && load_end <= end); -    const u64 start_offset = load_start - addr; +    MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); -    if (!is_tiled) { -        const u32 bytes_per_pixel{GetFormatBpp() >> 3}; +    if (!params.is_tiled) { +        const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; -        std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, -                    bytes_per_pixel * width * height); +        std::memcpy(gl_buffer.data(), texture_src_data, +                    bytes_per_pixel * params.width * params.height);      } else { -        morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, -                                                            GetActualHeight(), &gl_buffer[0], addr, -                                                            load_start, load_end); +        morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( +            params.width, params.block_height, params.height, gl_buffer.data(), params.addr); +    } + +    if (IsPixelFormatASTC(params.pixel_format)) { +        // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this +        ConvertASTCToRGBA8(gl_buffer, params.pixel_format, params.width, params.height);      }  }  MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { -    u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); -    if (dst_buffer == nullptr) -        return; - -    ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); +void CachedSurface::FlushGLBuffer() { +    u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); -    // TODO: Should probably be done in ::Memory:: and check for other regions too -    // same as loadglbuffer() -    if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) -        flush_end = Memory::VRAM_VADDR_END; - -    if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) -        flush_start = Memory::VRAM_VADDR; +    ASSERT(dst_buffer); +    ASSERT(gl_buffer.size() == +           params.width * params.height * GetGLBytesPerPixel(params.pixel_format));      MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); -    ASSERT(flush_start >= addr && flush_end <= end); -    const u64 start_offset = flush_start - addr; -    const u64 end_offset = flush_end - addr; - -    if (type == SurfaceType::Fill) { -        const u64 coarse_start_offset = start_offset - (start_offset % fill_size); -        const u64 backup_bytes = start_offset % fill_size; -        std::array<u8, 4> backup_data; -        if (backup_bytes) -            std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); - -        for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { -            std::memcpy(&dst_buffer[offset], &fill_data[0], -                        std::min(fill_size, end_offset - offset)); -        } - -        if (backup_bytes) -            std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); -    } else if (!is_tiled) { -        std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); +    if (!params.is_tiled) { +        std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);      } else { -        gl_to_morton_fns[static_cast<size_t>(pixel_format)]( -            stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); +        gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( +            params.width, params.block_height, params.height, gl_buffer.data(), params.addr);      }  }  MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); -void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, -                                    GLuint draw_fb_handle) { -    if (type == SurfaceType::Fill) +void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { +    if (params.type == SurfaceType::Fill)          return;      MICROPROFILE_SCOPE(OpenGL_TextureUL); -    ASSERT(gl_buffer_size == -           GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); +    ASSERT(gl_buffer.size() == +           params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + +    const auto& rect{params.GetRect()};      // Load data from memory to the surface      GLint x0 = static_cast<GLint>(rect.left);      GLint y0 = static_cast<GLint>(rect.bottom); -    size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); +    size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format); -    const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); +    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);      GLuint target_tex = texture.handle; - -    // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in -    // surface -    OGLTexture unscaled_tex; -    if (res_scale != 1) { -        x0 = 0; -        y0 = 0; - -        unscaled_tex.Create(); -        AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); -        target_tex = unscaled_tex.handle; -    } -      OpenGLState cur_state = OpenGLState::GetCurState();      GLuint old_tex = cur_state.texture_units[0].texture_2d; @@ -567,15 +296,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint      cur_state.Apply();      // Ensure no bad interactions with GL_UNPACK_ALIGNMENT -    ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); -    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); +    ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); +    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));      glActiveTexture(GL_TEXTURE0);      if (tuple.compressed) { -        glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, -                               static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), -                               static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, -                               static_cast<GLsizei>(size), &gl_buffer[buffer_offset]); +        glCompressedTexImage2D( +            GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), +            static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes), +            &gl_buffer[buffer_offset]);      } else {          glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),                          static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, @@ -586,827 +315,238 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint      cur_state.texture_units[0].texture_2d = old_tex;      cur_state.Apply(); - -    if (res_scale != 1) { -        auto scaled_rect = rect; -        scaled_rect.left *= res_scale; -        scaled_rect.top *= res_scale; -        scaled_rect.right *= res_scale; -        scaled_rect.bottom *= res_scale; - -        BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, -                     scaled_rect, type, read_fb_handle, draw_fb_handle); -    }  }  MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, -                                      GLuint draw_fb_handle) { -    if (type == SurfaceType::Fill) +void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { +    if (params.type == SurfaceType::Fill)          return;      MICROPROFILE_SCOPE(OpenGL_TextureDL); -    if (gl_buffer == nullptr) { -        gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); -        gl_buffer.reset(new u8[gl_buffer_size]); -    } +    gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));      OpenGLState state = OpenGLState::GetCurState();      OpenGLState prev_state = state;      SCOPE_EXIT({ prev_state.Apply(); }); -    const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); +    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);      // Ensure no bad interactions with GL_PACK_ALIGNMENT -    ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); -    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); -    size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); - -    // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush -    if (res_scale != 1) { -        auto scaled_rect = rect; -        scaled_rect.left *= res_scale; -        scaled_rect.top *= res_scale; -        scaled_rect.right *= res_scale; -        scaled_rect.bottom *= res_scale; - -        OGLTexture unscaled_tex; -        unscaled_tex.Create(); - -        MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; -        AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); -        BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, -                     read_fb_handle, draw_fb_handle); - -        state.texture_units[0].texture_2d = unscaled_tex.handle; -        state.Apply(); - -        glActiveTexture(GL_TEXTURE0); -        glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); -    } else { -        state.UnbindTexture(texture.handle); -        state.draw.read_framebuffer = read_fb_handle; -        state.Apply(); - -        if (type == SurfaceType::ColorTexture) { -            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, -                                   texture.handle, 0); -            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, -                                   0, 0); -        } else if (type == SurfaceType::Depth) { -            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, -                                   texture.handle, 0); -            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); -        } else { -            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, -                                   texture.handle, 0); -        } -        glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), -                     static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), -                     tuple.format, tuple.type, &gl_buffer[buffer_offset]); -    } +    ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); +    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); -    glPixelStorei(GL_PACK_ROW_LENGTH, 0); -} - -enum class MatchFlags { -    None = 0, -    Invalid = 1,      // Flag that can be applied to other match types, invalid matches require -                      // validation before they can be used -    Exact = 1 << 1,   // Surfaces perfectly match -    SubRect = 1 << 2, // Surface encompasses params -    Copy = 1 << 3,    // Surface we can copy from -    Expand = 1 << 4,  // Surface that can expand params -    TexCopy = 1 << 5  // Surface that will match a display transfer "texture copy" parameters -}; - -constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { -    return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); -} +    const auto& rect{params.GetRect()}; +    size_t buffer_offset = +        (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format); -constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { -    return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs)); -} +    state.UnbindTexture(texture.handle); +    state.draw.read_framebuffer = read_fb_handle; +    state.Apply(); -/// Get the best surface match (and its match type) for the given flags -template <MatchFlags find_flags> -Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, -                  ScaleMatch match_scale_type, -                  boost::optional<SurfaceInterval> validate_interval = boost::none) { -    Surface match_surface = nullptr; -    bool match_valid = false; -    u32 match_scale = 0; -    SurfaceInterval match_interval{}; - -    for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { -        for (auto& surface : pair.second) { -            bool res_scale_matched = match_scale_type == ScaleMatch::Exact -                                         ? (params.res_scale == surface->res_scale) -                                         : (params.res_scale <= surface->res_scale); -            // validity will be checked in GetCopyableInterval -            bool is_valid = -                (find_flags & MatchFlags::Copy) != MatchFlags::None -                    ? true -                    : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - -            if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid) -                continue; - -            auto IsMatch_Helper = [&](auto check_type, auto match_fn) { -                if ((find_flags & check_type) == MatchFlags::None) -                    return; - -                bool matched; -                SurfaceInterval surface_interval; -                std::tie(matched, surface_interval) = match_fn(); -                if (!matched) -                    return; - -                if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && -                    surface->type != SurfaceType::Fill) -                    return; - -                // Found a match, update only if this is better than the previous one -                auto UpdateMatch = [&] { -                    match_surface = surface; -                    match_valid = is_valid; -                    match_scale = surface->res_scale; -                    match_interval = surface_interval; -                }; - -                if (surface->res_scale > match_scale) { -                    UpdateMatch(); -                    return; -                } else if (surface->res_scale < match_scale) { -                    return; -                } - -                if (is_valid && !match_valid) { -                    UpdateMatch(); -                    return; -                } else if (is_valid != match_valid) { -                    return; -                } - -                if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { -                    UpdateMatch(); -                } -            }; -            IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { -                return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); -            }); -            IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { -                return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); -            }); -            IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { -                auto copy_interval = -                    params.FromInterval(*validate_interval).GetCopyableInterval(surface); -                bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && -                               surface->CanCopy(params, copy_interval); -                return std::make_pair(matched, copy_interval); -            }); -            IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { -                return std::make_pair(surface->CanExpand(params), surface->GetInterval()); -            }); -            IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { -                return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); -            }); -        } +    if (params.type == SurfaceType::ColorTexture) { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, +                               texture.handle, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, +                               0); +    } else if (params.type == SurfaceType::Depth) { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, +                               texture.handle, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +    } else { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, +                               texture.handle, 0);      } -    return match_surface; +    glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), +                 static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), +                 tuple.format, tuple.type, &gl_buffer[buffer_offset]); + +    glPixelStorei(GL_PACK_ROW_LENGTH, 0);  }  RasterizerCacheOpenGL::RasterizerCacheOpenGL() {      read_framebuffer.Create();      draw_framebuffer.Create(); - -    attributeless_vao.Create(); - -    d24s8_abgr_buffer.Create(); -    d24s8_abgr_buffer_size = 0; - -    const char* vs_source = R"( -#version 330 core -const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); -void main() { -    gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); -} -)"; -    const char* fs_source = R"( -#version 330 core - -uniform samplerBuffer tbo; -uniform vec2 tbo_size; -uniform vec4 viewport; - -out vec4 color; - -void main() { -    vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; -    int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); -    color = texelFetch(tbo, tbo_offset).rabg; -} -)"; -    d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); - -    OpenGLState state = OpenGLState::GetCurState(); -    GLuint old_program = state.draw.shader_program; -    state.draw.shader_program = d24s8_abgr_shader.handle; -    state.Apply(); - -    GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); -    ASSERT(tbo_u_id != -1); -    glUniform1i(tbo_u_id, 0); - -    state.draw.shader_program = old_program; -    state.Apply(); - -    d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); -    ASSERT(d24s8_abgr_tbo_size_u_id != -1); -    d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); -    ASSERT(d24s8_abgr_viewport_u_id != -1);  }  RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { -    FlushAll(); -    while (!surface_cache.empty()) -        UnregisterSurface(*surface_cache.begin()->second.begin()); -} - -bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, -                                         const MathUtil::Rectangle<u32>& src_rect, -                                         const Surface& dst_surface, -                                         const MathUtil::Rectangle<u32>& dst_rect) { -    if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) -        return false; - -    return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, -                        dst_rect, src_surface->type, read_framebuffer.handle, -                        draw_framebuffer.handle); -} - -void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, -                                               const MathUtil::Rectangle<u32>& src_rect, -                                               GLuint dst_tex, -                                               const MathUtil::Rectangle<u32>& dst_rect) { -    OpenGLState prev_state = OpenGLState::GetCurState(); -    SCOPE_EXIT({ prev_state.Apply(); }); - -    OpenGLState state; -    state.draw.read_framebuffer = read_framebuffer.handle; -    state.draw.draw_framebuffer = draw_framebuffer.handle; -    state.Apply(); - -    glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); - -    GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; -    if (target_pbo_size > d24s8_abgr_buffer_size) { -        d24s8_abgr_buffer_size = target_pbo_size * 2; -        glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); -    } - -    glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -    glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, -                           0); -    glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom), -                 static_cast<GLsizei>(src_rect.GetWidth()), -                 static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, -                 0); - -    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - -    // PBO now contains src_tex in RABG format -    state.draw.shader_program = d24s8_abgr_shader.handle; -    state.draw.vertex_array = attributeless_vao.handle; -    state.viewport.x = static_cast<GLint>(dst_rect.left); -    state.viewport.y = static_cast<GLint>(dst_rect.bottom); -    state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth()); -    state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight()); -    state.Apply(); - -    OGLTexture tbo; -    tbo.Create(); -    glActiveTexture(GL_TEXTURE0); -    glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); -    glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); - -    glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()), -                static_cast<GLfloat>(src_rect.GetHeight())); -    glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x), -                static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width), -                static_cast<GLfloat>(state.viewport.height)); - -    glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); -    glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); -    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - -    glBindTexture(GL_TEXTURE_BUFFER, 0); -} - -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, -                                          bool load_if_create) { -    if (params.addr == 0 || params.height * params.width == 0) { -        return nullptr; -    } -    // Use GetSurfaceSubRect instead -    ASSERT(params.width == params.stride); - -    // Check for an exact match in existing surfaces -    Surface surface = -        FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); - -    if (surface == nullptr) { -        u16 target_res_scale = params.res_scale; -        if (match_res_scale != ScaleMatch::Exact) { -            // This surface may have a subrect of another surface with a higher res_scale, find it -            // to adjust our params -            SurfaceParams find_params = params; -            Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( -                surface_cache, find_params, match_res_scale); -            if (expandable != nullptr && expandable->res_scale > target_res_scale) { -                target_res_scale = expandable->res_scale; -            } -        } -        SurfaceParams new_params = params; -        new_params.res_scale = target_res_scale; -        surface = CreateSurface(new_params); -        RegisterSurface(surface); +    while (!surface_cache.empty()) { +        UnregisterSurface(surface_cache.begin()->second);      } - -    if (load_if_create) { -        ValidateSurface(surface, params.addr, params.size); -    } - -    return surface; -} - -boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( -    VAddr cpu_addr) const { -    // Tries to find the GPU address of a framebuffer based on the CPU address. This is because -    // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU -    // addresses. We iterate through all cached framebuffers, and compare their starting CPU address -    // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps -    // surfaces. - -    std::vector<Tegra::GPUVAddr> gpu_addresses; -    for (const auto& pair : surface_cache) { -        for (const auto& surface : pair.second) { -            const VAddr surface_cpu_addr = surface->GetCpuAddr(); -            if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { -                ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); -                gpu_addresses.push_back(surface->addr); -            } -        } -    } - -    if (gpu_addresses.empty()) { -        return {}; -    } - -    ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); -    return gpu_addresses[0]; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, -                                                           ScaleMatch match_res_scale, -                                                           bool load_if_create) { -    if (params.addr == 0 || params.height * params.width == 0) { -        return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{}); -    } - -    // Attempt to find encompassing surface -    Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, -                                                                           match_res_scale); - -    // Check if FindMatch failed because of res scaling -    // If that's the case create a new surface with -    // the dimensions of the lower res_scale surface -    // to suggest it should not be used again -    if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { -        surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, -                                                                       ScaleMatch::Ignore); -        if (surface != nullptr) { -            ASSERT(surface->res_scale < params.res_scale); -            SurfaceParams new_params = *surface; -            new_params.res_scale = params.res_scale; - -            surface = CreateSurface(new_params); -            RegisterSurface(surface); -        } -    } - -    SurfaceParams aligned_params = params; -    if (params.is_tiled) { -        aligned_params.height = Common::AlignUp(params.height, 8); -        aligned_params.width = Common::AlignUp(params.width, 8); -        aligned_params.stride = Common::AlignUp(params.stride, 8); -        aligned_params.UpdateParams(); -    } - -    // Check for a surface we can expand before creating a new one -    if (surface == nullptr) { -        surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params, -                                                                      match_res_scale); -        if (surface != nullptr) { -            aligned_params.width = aligned_params.stride; -            aligned_params.UpdateParams(); - -            SurfaceParams new_params = *surface; -            new_params.addr = std::min(aligned_params.addr, surface->addr); -            new_params.end = std::max(aligned_params.end, surface->end); -            new_params.size = new_params.end - new_params.addr; -            new_params.height = static_cast<u32>( -                new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); -            ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - -            Surface new_surface = CreateSurface(new_params); -            DuplicateSurface(surface, new_surface); - -            // Delete the expanded surface, this can't be done safely yet -            // because it may still be in use -            remove_surfaces.emplace(surface); - -            surface = new_surface; -            RegisterSurface(new_surface); -        } -    } - -    // No subrect found - create and return a new surface -    if (surface == nullptr) { -        SurfaceParams new_params = aligned_params; -        // Can't have gaps in a surface -        new_params.width = aligned_params.stride; -        new_params.UpdateParams(); -        // GetSurface will create the new surface and possibly adjust res_scale if necessary -        surface = GetSurface(new_params, match_res_scale, load_if_create); -    } else if (load_if_create) { -        ValidateSurface(surface, aligned_params.addr, aligned_params.size); -    } - -    return std::make_tuple(surface, surface->GetScaledSubRect(params));  }  Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { -    auto& gpu = Core::System::GetInstance().GPU(); - -    SurfaceParams params; -    params.addr = config.tic.Address(); -    params.is_tiled = config.tic.IsTiled(); -    params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); - -    params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) / -                   params.GetCompresssionFactor(); -    params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) / -                    params.GetCompresssionFactor(); - -    // TODO(Subv): Different types per component are not supported. -    ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && -           config.tic.r_type.Value() == config.tic.b_type.Value() && -           config.tic.r_type.Value() == config.tic.a_type.Value()); - -    params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); - -    if (config.tic.IsTiled()) { -        params.block_height = config.tic.BlockHeight(); - -        // TODO(bunnei): The below align up is a hack. This is here because some compressed textures -        // are not a multiple of their own compression factor, and so this accounts for that. This -        // could potentially result in an extra row of 4px being decoded if a texture is not a -        // multiple of 4. -        params.width = Common::AlignUp(params.width, 4); -        params.height = Common::AlignUp(params.height, 4); -    } else { -        // Use the texture-provided stride value if the texture isn't tiled. -        params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); -    } - -    params.UpdateParams(); - -    return GetSurface(params, ScaleMatch::Ignore, true); +    return GetSurface(SurfaceParams::CreateForTexture(config));  }  SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(      bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) {      const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; -    const auto& config = regs.rt[0];      // TODO(bunnei): This is hard corded to use just the first render buffer      NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); -    // update resolution_scale_factor and reset cache if changed -    // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We -    // need to fix this before making the renderer multi-threaded. -    static u16 resolution_scale_factor = GetResolutionScaleFactor(); -    if (resolution_scale_factor != GetResolutionScaleFactor()) { -        resolution_scale_factor = GetResolutionScaleFactor(); -        FlushAll(); -        while (!surface_cache.empty()) -            UnregisterSurface(*surface_cache.begin()->second.begin()); -    } - -    MathUtil::Rectangle<u32> viewport_clamped{ -        static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))), -        static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))), -        static_cast<u32>(std::clamp(viewport.right, 0, static_cast<s32>(config.width))), -        static_cast<u32>(std::clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; -      // get color and depth surfaces -    SurfaceParams color_params; -    color_params.is_tiled = true; -    color_params.res_scale = resolution_scale_factor; -    color_params.width = config.width; -    color_params.height = config.height; -    // TODO(Subv): Can framebuffers use a different block height? -    color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; -    SurfaceParams depth_params = color_params; - -    color_params.addr = config.Address(); -    color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); -    color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); -    color_params.UpdateParams(); +    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])}; +    const SurfaceParams depth_params{color_params};      ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); -    // depth_params.addr = config.GetDepthBufferPhysicalAddress(); -    // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); -    // depth_params.UpdateParams(); - -    auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); -    auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - -    // Make sure that framebuffers don't overlap if both color and depth are being used -    if (using_color_fb && using_depth_fb && -        boost::icl::length(color_vp_interval & depth_vp_interval)) { -        NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " -                                      "overlapping framebuffers not supported!"); -        using_depth_fb = false; -    }      MathUtil::Rectangle<u32> color_rect{}; -    Surface color_surface = nullptr; -    if (using_color_fb) -        std::tie(color_surface, color_rect) = -            GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); +    Surface color_surface; +    if (using_color_fb) { +        color_surface = GetSurface(color_params); +        if (color_surface) { +            color_rect = color_surface->GetSurfaceParams().GetRect(); +        } +    }      MathUtil::Rectangle<u32> depth_rect{}; -    Surface depth_surface = nullptr; -    if (using_depth_fb) -        std::tie(depth_surface, depth_rect) = -            GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); +    Surface depth_surface; +    if (using_depth_fb) { +        depth_surface = GetSurface(depth_params); +        if (depth_surface) { +            depth_rect = depth_surface->GetSurfaceParams().GetRect(); +        } +    }      MathUtil::Rectangle<u32> fb_rect{}; -    if (color_surface != nullptr && depth_surface != nullptr) { +    if (color_surface && depth_surface) {          fb_rect = color_rect;          // Color and Depth surfaces must have the same dimensions and offsets          if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||              color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { -            color_surface = GetSurface(color_params, ScaleMatch::Exact, false); -            depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); -            fb_rect = color_surface->GetScaledRect(); +            color_surface = GetSurface(color_params); +            depth_surface = GetSurface(depth_params); +            fb_rect = color_surface->GetSurfaceParams().GetRect();          } -    } else if (color_surface != nullptr) { +    } else if (color_surface) {          fb_rect = color_rect; -    } else if (depth_surface != nullptr) { +    } else if (depth_surface) {          fb_rect = depth_rect;      } -    if (color_surface != nullptr) { -        ValidateSurface(color_surface, boost::icl::first(color_vp_interval), -                        boost::icl::length(color_vp_interval)); -    } -    if (depth_surface != nullptr) { -        ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), -                        boost::icl::length(depth_vp_interval)); -    } -      return std::make_tuple(color_surface, depth_surface, fb_rect);  } -Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { -    UNREACHABLE(); -    return {}; +void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { +    surface->LoadGLBuffer(); +    surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);  } -SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { -    MathUtil::Rectangle<u32> rect{}; - -    Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( -        surface_cache, params, ScaleMatch::Ignore); - -    if (match_surface != nullptr) { -        ValidateSurface(match_surface, params.addr, params.size); - -        SurfaceParams match_subrect; -        if (params.width != params.stride) { -            const u32 tiled_size = match_surface->is_tiled ? 8 : 1; -            match_subrect = params; -            match_subrect.width = -                static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size); -            match_subrect.stride = -                static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size); -            match_subrect.height *= tiled_size; -        } else { -            match_subrect = match_surface->FromInterval(params.GetInterval()); -            ASSERT(match_subrect.GetInterval() == params.GetInterval()); -        } - -        rect = match_surface->GetScaledSubRect(match_subrect); +void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { +    if (Settings::values.use_accurate_framebuffers) { +        // If enabled, always flush dirty surfaces +        surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); +        surface->FlushGLBuffer(); +    } else { +        // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads +        // and flushes are very slow and do not seem to improve accuracy +        const auto& params{surface->GetSurfaceParams()}; +        Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);      } - -    return std::make_tuple(match_surface, rect);  } -void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, -                                             const Surface& dest_surface) { -    ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - -    BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, -                 dest_surface->GetScaledSubRect(*src_surface)); - -    dest_surface->invalid_regions -= src_surface->GetInterval(); -    dest_surface->invalid_regions += src_surface->invalid_regions; +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { +    if (params.addr == 0 || params.height * params.width == 0) { +        return {}; +    } -    SurfaceRegions regions; -    for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { -        if (pair.second == src_surface) { -            regions += pair.first; +    // Check for an exact match in existing surfaces +    const auto& surface_key{SurfaceKey::Create(params)}; +    const auto& search{surface_cache.find(surface_key)}; +    Surface surface; +    if (search != surface_cache.end()) { +        surface = search->second; +        if (Settings::values.use_accurate_framebuffers) { +            // Reload the surface from Switch memory +            LoadSurface(surface);          } +    } else { +        surface = std::make_shared<CachedSurface>(params); +        RegisterSurface(surface); +        LoadSurface(surface);      } -    for (auto& interval : regions) { -        dirty_regions.set({interval, dest_surface}); -    } -} - -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, -                                            u64 size) { -    if (size == 0) -        return; - -    const SurfaceInterval validate_interval(addr, addr + size); -    if (surface->type == SurfaceType::Fill) { -        // Sanity check, fill surfaces will always be valid when used -        ASSERT(surface->IsRegionValid(validate_interval)); -        return; -    } +    return surface; +} -    while (true) { -        const auto it = surface->invalid_regions.find(validate_interval); -        if (it == surface->invalid_regions.end()) -            break; - -        const auto interval = *it & validate_interval; -        // Look for a valid surface to copy from -        SurfaceParams params = *surface; - -        Surface copy_surface = -            FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); -        if (copy_surface != nullptr) { -            SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); -            CopySurface(copy_surface, surface, copy_interval); -            surface->invalid_regions.erase(copy_interval); -            continue; +Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { +    // Tries to find the GPU address of a framebuffer based on the CPU address. This is because +    // final output framebuffers are specified by CPU address, but internally our GPU cache uses +    // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU +    // address to the one provided. This is obviously not great, and won't work if the +    // framebuffer overlaps surfaces. + +    std::vector<Surface> surfaces; +    for (const auto& surface : surface_cache) { +        const auto& params = surface.second->GetSurfaceParams(); +        const VAddr surface_cpu_addr = params.GetCpuAddr(); +        if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { +            ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); +            surfaces.push_back(surface.second);          } - -        // Load data from Switch memory -        FlushRegion(params.addr, params.size); -        surface->LoadGLBuffer(params.addr, params.end); -        surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, -                                 draw_framebuffer.handle); -        surface->invalid_regions.erase(params.GetInterval());      } -} -void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { -    if (size == 0) -        return; +    if (surfaces.empty()) { +        return {}; +    } -    const SurfaceInterval flush_interval(addr, addr + size); -    SurfaceRegions flushed_intervals; +    ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported"); -    for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { -        // small sizes imply that this most likely comes from the cpu, flush the entire region -        // the point is to avoid thousands of small writes every frame if the cpu decides to access -        // that region, anything higher than 8 you're guaranteed it comes from a service -        const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; -        auto& surface = pair.second; +    return surfaces[0]; +} -        if (flush_surface != nullptr && surface != flush_surface) -            continue; +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { +    // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should +    // probably implement this in the future, but for now, the `use_accurate_framebufers` setting +    // can be used to always flush. +} -        // Sanity check, this surface is the last one that marked this region dirty -        ASSERT(surface->IsRegionValid(interval)); +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { +    for (const auto& pair : surface_cache) { +        const auto& surface{pair.second}; +        const auto& params{surface->GetSurfaceParams()}; -        if (surface->type != SurfaceType::Fill) { -            SurfaceParams params = surface->FromInterval(interval); -            surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, -                                       draw_framebuffer.handle); +        if (params.IsOverlappingRegion(addr, size)) { +            UnregisterSurface(surface);          } -        surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); -        flushed_intervals += interval;      } -    // Reset dirty regions -    dirty_regions -= flushed_intervals;  } -void RasterizerCacheOpenGL::FlushAll() { -    FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); -} +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { +    const auto& params{surface->GetSurfaceParams()}; +    const auto& surface_key{SurfaceKey::Create(params)}; +    const auto& search{surface_cache.find(surface_key)}; -void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, -                                             const Surface& region_owner) { -    if (size == 0) +    if (search != surface_cache.end()) { +        // Registered already          return; - -    const SurfaceInterval invalid_interval(addr, addr + size); - -    if (region_owner != nullptr) { -        ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); -        // Surfaces can't have a gap -        ASSERT(region_owner->width == region_owner->stride); -        region_owner->invalid_regions.erase(invalid_interval);      } -    for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { -        for (auto& cached_surface : pair.second) { -            if (cached_surface == region_owner) -                continue; - -            // If cpu is invalidating this region we want to remove it -            // to (likely) mark the memory pages as uncached -            if (region_owner == nullptr && size <= 8) { -                FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); -                remove_surfaces.emplace(cached_surface); -                continue; -            } - -            const auto interval = cached_surface->GetInterval() & invalid_interval; -            cached_surface->invalid_regions.insert(interval); - -            // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures -            if (cached_surface->type == SurfaceType::Fill && -                cached_surface->IsSurfaceFullyInvalid()) { -                remove_surfaces.emplace(cached_surface); -            } -        } -    } - -    if (region_owner != nullptr) -        dirty_regions.set({invalid_interval, region_owner}); -    else -        dirty_regions.erase(invalid_interval); - -    for (auto& remove_surface : remove_surfaces) { -        if (remove_surface == region_owner) { -            Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>( -                surface_cache, *region_owner, ScaleMatch::Ignore); -            ASSERT(expanded_surface); - -            if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { -                DuplicateSurface(region_owner, expanded_surface); -            } else { -                continue; -            } -        } -        UnregisterSurface(remove_surface); -    } - -    remove_surfaces.clear(); +    surface_cache[surface_key] = surface; +    UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);  } -Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { -    Surface surface = std::make_shared<CachedSurface>(); -    static_cast<SurfaceParams&>(*surface) = params; - -    surface->texture.Create(); - -    surface->gl_buffer_size = 0; -    surface->invalid_regions.insert(surface->GetInterval()); -    AllocateSurfaceTexture(surface->texture.handle, -                           GetFormatTuple(surface->pixel_format, surface->component_type), -                           surface->GetScaledWidth(), surface->GetScaledHeight()); - -    return surface; -} +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { +    const auto& params{surface->GetSurfaceParams()}; +    const auto& surface_key{SurfaceKey::Create(params)}; +    const auto& search{surface_cache.find(surface_key)}; -void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { -    if (surface->registered) { +    if (search == surface_cache.end()) { +        // Unregistered already          return;      } -    surface->registered = true; -    surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); -    UpdatePagesCachedCount(surface->addr, surface->size, 1); + +    UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); +    surface_cache.erase(search);  } -void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { -    if (!surface->registered) { -        return; -    } -    surface->registered = false; -    UpdatePagesCachedCount(surface->addr, surface->size, -1); -    surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +template <typename Map, typename Interval> +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { +    return boost::make_iterator_range(map.equal_range(interval));  }  void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9da945e19..85e7c8888 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -1,57 +1,26 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included.  #pragma once  #include <array> +#include <map>  #include <memory> -#include <set> -#include <tuple> -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-local-typedefs" -#endif +#include <vector>  #include <boost/icl/interval_map.hpp> -#include <boost/icl/interval_set.hpp> -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#include <boost/optional.hpp> -#include <glad/glad.h> -#include "common/assert.h" -#include "common/common_funcs.h"  #include "common/common_types.h" +#include "common/hash.h"  #include "common/math_util.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" +#include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  #include "video_core/textures/texture.h" -struct CachedSurface; +class CachedSurface;  using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSet = std::set<Surface>; - -using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>; -using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>; -using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>; - -using SurfaceInterval = SurfaceCache::interval_type; -static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && -                  std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), -              "incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;  using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; -  using PageMap = boost::icl::interval_map<u64, int>; -enum class ScaleMatch { -    Exact,   // only accept same res scale -    Upscale, // only allow higher scale than params -    Ignore   // accept every scaled res -}; -  struct SurfaceParams {      enum class PixelFormat {          ABGR8 = 0, @@ -93,10 +62,10 @@ struct SurfaceParams {      /**       * Gets the compression factor for the specified PixelFormat. This applies to just the       * "compressed width" and "compressed height", not the overall compression factor of a -     * compressed image. This is used for maintaining proper surface sizes for compressed texture -     * formats. +     * compressed image. This is used for maintaining proper surface sizes for compressed +     * texture formats.       */ -    static constexpr u32 GetCompresssionFactor(PixelFormat format) { +    static constexpr u32 GetCompressionFactor(PixelFormat format) {          if (format == PixelFormat::Invalid)              return 0; @@ -112,15 +81,12 @@ struct SurfaceParams {              4, // DXT23              4, // DXT45              4, // DXN1 -            1, // ASTC_2D_4X4 +            4, // ASTC_2D_4X4          }};          ASSERT(static_cast<size_t>(format) < compression_factor_table.size());          return compression_factor_table[static_cast<size_t>(format)];      } -    u32 GetCompresssionFactor() const { -        return GetCompresssionFactor(pixel_format); -    }      static constexpr u32 GetFormatBpp(PixelFormat format) {          if (format == PixelFormat::Invalid) @@ -165,25 +131,6 @@ struct SurfaceParams {          }      } -    static bool IsFormatASTC(PixelFormat format) { -        switch (format) { -        case PixelFormat::ASTC_2D_4X4: -            return true; -        default: -            return false; -        } -    } - -    static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { -        switch (format) { -        case Tegra::FramebufferConfig::PixelFormat::ABGR8: -            return PixelFormat::ABGR8; -        default: -            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); -            UNREACHABLE(); -        } -    } -      static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) {          // TODO(Subv): Properly implement this          switch (format) { @@ -276,36 +223,16 @@ struct SurfaceParams {          }      } -    static ComponentType ComponentTypeFromGPUPixelFormat( -        Tegra::FramebufferConfig::PixelFormat format) { +    static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {          switch (format) {          case Tegra::FramebufferConfig::PixelFormat::ABGR8: -            return ComponentType::UNorm; +            return PixelFormat::ABGR8;          default:              NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));              UNREACHABLE();          }      } -    static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { -        SurfaceType a_type = GetFormatType(pixel_format_a); -        SurfaceType b_type = GetFormatType(pixel_format_b); - -        if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { -            return true; -        } - -        if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { -            return true; -        } - -        if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { -            return true; -        } - -        return false; -    } -      static SurfaceType GetFormatType(PixelFormat pixel_format) {          if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {              return SurfaceType::ColorTexture; @@ -317,168 +244,101 @@ struct SurfaceParams {          return SurfaceType::Invalid;      } -    /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" -    /// and "pixel_format" -    void UpdateParams() { -        if (stride == 0) { -            stride = width; -        } -        type = GetFormatType(pixel_format); -        size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) -                         : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); -        end = addr + size; -    } - -    SurfaceInterval GetInterval() const { -        return SurfaceInterval::right_open(addr, end); -    } - -    // Returns the outer rectangle containing "interval" -    SurfaceParams FromInterval(SurfaceInterval interval) const; - -    SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; - -    // Returns the region of the biggest valid rectange within interval -    SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - -    /** -     * Gets the actual width (in pixels) of the surface. This is provided because `width` is used -     * for tracking the surface region in memory, which may be compressed for certain formats. In -     * this scenario, `width` is actually the compressed width. -     */ -    u32 GetActualWidth() const { -        return width * GetCompresssionFactor(); -    } - -    /** -     * Gets the actual height (in pixels) of the surface. This is provided because `height` is used -     * for tracking the surface region in memory, which may be compressed for certain formats. In -     * this scenario, `height` is actually the compressed height. -     */ -    u32 GetActualHeight() const { -        return height * GetCompresssionFactor(); -    } +    /// Returns the rectangle corresponding to this surface +    MathUtil::Rectangle<u32> GetRect() const; -    u32 GetScaledWidth() const { -        return width * res_scale; +    /// Returns the size of this surface in bytes, adjusted for compression +    size_t SizeInBytes() const { +        const u32 compression_factor{GetCompressionFactor(pixel_format)}; +        ASSERT(width % compression_factor == 0); +        ASSERT(height % compression_factor == 0); +        return (width / compression_factor) * (height / compression_factor) * +               GetFormatBpp(pixel_format) / CHAR_BIT;      } -    u32 GetScaledHeight() const { -        return height * res_scale; -    } +    /// Returns the CPU virtual address for this surface +    VAddr GetCpuAddr() const; -    MathUtil::Rectangle<u32> GetRect() const { -        return {0, height, width, 0}; +    /// Returns true if the specified region overlaps with this surface's region in Switch memory +    bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { +        return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);      } -    MathUtil::Rectangle<u32> GetScaledRect() const { -        return {0, GetScaledHeight(), GetScaledWidth(), 0}; -    } +    /// Creates SurfaceParams from a texture configation +    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); + +    /// Creates SurfaceParams from a framebuffer configation +    static SurfaceParams CreateForFramebuffer( +        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); + +    Tegra::GPUVAddr addr; +    bool is_tiled; +    u32 block_height; +    PixelFormat pixel_format; +    ComponentType component_type; +    SurfaceType type; +    u32 width; +    u32 height; +    u32 unaligned_height; +    size_t size_in_bytes; +}; -    u64 PixelsInBytes(u64 size) const { -        return size * CHAR_BIT / GetFormatBpp(pixel_format); +/// Hashable variation of SurfaceParams, used for a key in the surface cache +struct SurfaceKey : Common::HashableStruct<SurfaceParams> { +    static SurfaceKey Create(const SurfaceParams& params) { +        SurfaceKey res; +        res.state = params; +        return res;      } +}; -    u64 BytesInPixels(u64 pixels) const { -        return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; +namespace std { +template <> +struct hash<SurfaceKey> { +    size_t operator()(const SurfaceKey& k) const { +        return k.Hash();      } - -    VAddr GetCpuAddr() const; - -    bool ExactMatch(const SurfaceParams& other_surface) const; -    bool CanSubRect(const SurfaceParams& sub_surface) const; -    bool CanExpand(const SurfaceParams& expanded_surface) const; -    bool CanTexCopy(const SurfaceParams& texcopy_params) const; - -    MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; -    MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; - -    Tegra::GPUVAddr addr = 0; -    Tegra::GPUVAddr end = 0; -    boost::optional<VAddr> cpu_addr; -    u64 size = 0; - -    u32 width = 0; -    u32 height = 0; -    u32 stride = 0; -    u32 block_height = 0; -    u16 res_scale = 1; - -    bool is_tiled = false; -    PixelFormat pixel_format = PixelFormat::Invalid; -    SurfaceType type = SurfaceType::Invalid; -    ComponentType component_type = ComponentType::Invalid;  }; +} // namespace std -struct CachedSurface : SurfaceParams { -    bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; -    bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - -    bool IsRegionValid(SurfaceInterval interval) const { -        return (invalid_regions.find(interval) == invalid_regions.end()); -    } +class CachedSurface final { +public: +    CachedSurface(const SurfaceParams& params); -    bool IsSurfaceFullyInvalid() const { -        return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); +    const OGLTexture& Texture() const { +        return texture;      } -    bool registered = false; -    SurfaceRegions invalid_regions; - -    u64 fill_size = 0; /// Number of bytes to read from fill_data -    std::array<u8, 4> fill_data; - -    OGLTexture texture; - -    static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { -        if (format == PixelFormat::Invalid) +    static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) { +        if (format == SurfaceParams::PixelFormat::Invalid)              return 0;          return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;      } -    std::unique_ptr<u8[]> gl_buffer; -    size_t gl_buffer_size = 0; +    const SurfaceParams& GetSurfaceParams() const { +        return params; +    }      // Read/Write data in Switch memory to/from gl_buffer -    void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); -    void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); +    void LoadGLBuffer(); +    void FlushGLBuffer();      // Upload/Download data in gl_buffer in/to this surface's texture -    void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, -                         GLuint draw_fb_handle); -    void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, -                           GLuint draw_fb_handle); +    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); +    void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + +private: +    OGLTexture texture; +    std::vector<u8> gl_buffer; +    SurfaceParams params;  }; -class RasterizerCacheOpenGL : NonCopyable { +class RasterizerCacheOpenGL final : NonCopyable {  public:      RasterizerCacheOpenGL();      ~RasterizerCacheOpenGL(); -    /// Blit one surface's texture to another -    bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect, -                      const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect); - -    void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, -                            GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect); - -    /// Copy one surface's region to another -    void CopySurface(const Surface& src_surface, const Surface& dst_surface, -                     SurfaceInterval copy_interval); - -    /// Load a texture from Switch memory to OpenGL and cache it (if not already cached) -    Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, -                       bool load_if_create); - -    /// Tries to find a framebuffer GPU address based on the provided CPU address -    boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const; - -    /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from -    /// Switch memory to OpenGL and caches it (if not already cached) -    SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, -                                        bool load_if_create); -      /// Get a surface based on the texture configuration      Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); @@ -486,29 +346,21 @@ public:      SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,                                                      const MathUtil::Rectangle<s32>& viewport); -    /// Get a surface that matches the fill config -    Surface GetFillSurface(const void* config); +    /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory +    void MarkSurfaceAsDirty(const Surface& surface); -    /// Get a surface that matches a "texture copy" display transfer config -    SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); +    /// Tries to find a framebuffer GPU address based on the provided CPU address +    Surface TryFindFramebufferSurface(VAddr cpu_addr) const;      /// Write any cached resources overlapping the region back to memory (if dirty) -    void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); - -    /// Mark region as being invalidated by region_owner (nullptr if Switch memory) -    void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); +    void FlushRegion(Tegra::GPUVAddr addr, size_t size); -    /// Flush all cached resources tracked by this cache manager -    void FlushAll(); +    /// Mark the specified region as being invalidated +    void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);  private: -    void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); - -    /// Update surface's texture for given region when necessary -    void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); - -    /// Create a new surface -    Surface CreateSurface(const SurfaceParams& params); +    void LoadSurface(const Surface& surface); +    Surface GetSurface(const SurfaceParams& params);      /// Register surface into the cache      void RegisterSurface(const Surface& surface); @@ -519,18 +371,9 @@ private:      /// Increase/decrease the number of surface in pages touching the specified region      void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); -    SurfaceCache surface_cache; +    std::unordered_map<SurfaceKey, Surface> surface_cache;      PageMap cached_pages; -    SurfaceMap dirty_regions; -    SurfaceSet remove_surfaces;      OGLFramebuffer read_framebuffer;      OGLFramebuffer draw_framebuffer; - -    OGLVertexArray attributeless_vao; -    OGLBuffer d24s8_abgr_buffer; -    GLsizeiptr d24s8_abgr_buffer_size; -    OGLProgram d24s8_abgr_shader; -    GLint d24s8_abgr_tbo_size_u_id; -    GLint d24s8_abgr_viewport_u_id;  }; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f33766bfd..e3bb2cbb8 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf                                           screen_info)) {          // Reset the screen info's display texture to its own permanent texture          screen_info.display_texture = screen_info.texture.resource.handle; -        screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);          Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,                                               Memory::FlushMode::Flush); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 2cc6d9a00..21f0d298c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -27,7 +27,7 @@ struct TextureInfo {  /// Structure used for storing information about the display target for the Switch screen  struct ScreenInfo {      GLuint display_texture; -    MathUtil::Rectangle<float> display_texcoords; +    const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};      TextureInfo texture;  }; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 8316db708..cd7986efa 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -84,6 +84,8 @@ void Config::ReadValues() {      qt_config->beginGroup("Renderer");      Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();      Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); +    Settings::values.use_accurate_framebuffers = +        qt_config->value("use_accurate_framebuffers", false).toBool();      Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();      Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); @@ -184,6 +186,7 @@ void Config::SaveValues() {      qt_config->beginGroup("Renderer");      qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);      qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit); +    qt_config->setValue("use_accurate_framebuffers", Settings::values.use_accurate_framebuffers);      // Cast to double because Qt's written float values are not human-readable      qt_config->setValue("bg_red", (double)Settings::values.bg_red); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 47b9b6e95..7664880d5 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -59,11 +59,13 @@ void ConfigureGraphics::setConfiguration() {      ui->resolution_factor_combobox->setCurrentIndex(          static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));      ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit); +    ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers);  }  void ConfigureGraphics::applyConfiguration() {      Settings::values.resolution_factor =          ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));      Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked(); +    Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked();      Settings::Apply();  } diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 366931a9a..7d092df03 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -30,6 +30,13 @@           </widget>          </item>          <item> +         <widget class="QCheckBox" name="use_accurate_framebuffers"> +          <property name="text"> +           <string>Use accurate framebuffers (slow)</string> +          </property> +         </widget> +        </item> +        <item>           <layout class="QHBoxLayout" name="horizontalLayout">            <item>             <widget class="QLabel" name="label"> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index ee6e4d658..150915c17 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -98,6 +98,8 @@ void Config::ReadValues() {          (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);      Settings::values.toggle_framelimit =          sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); +    Settings::values.use_accurate_framebuffers = +        sdl2_config->GetBoolean("Renderer", "use_accurate_framebuffers", false);      Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);      Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 1c438c3f5..5896971d4 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -102,6 +102,10 @@ resolution_factor =  # 0 (default): Off, 1: On  use_vsync = +# Whether to use accurate framebuffers +# 0 (default): Off (fast), 1 : On (slow) +use_accurate_framebuffers = +  # The clear color for the renderer. What shows up on the sides of the bottom screen.  # Must be in range of 0.0-1.0. Defaults to 1.0 for all.  bg_red =  | 
