diff options
| author | bunnei <bunneidev@gmail.com> | 2020-12-29 23:20:09 -0800 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-12-29 23:20:09 -0800 | 
| commit | d5fe722a30947c5c808717ad8940077e95a3a81c (patch) | |
| tree | f6f5d6d6379b0404147969e7d1f548ed3d49ca01 | |
| parent | 85cfd96f62177338de78d5ca6d7fa4eda0a3728b (diff) | |
| parent | 9764c13d6d2977903f407761b27d847c0056e1c4 (diff) | |
Merge pull request #4967 from ReinUsesLisp/new-texcache
video_core/texture_cache: Rewrite the texture cache
165 files changed, 10939 insertions, 7990 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5b73724ce..948e167c3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -25,6 +25,7 @@ add_library(video_core STATIC      command_classes/vic.h      compatible_formats.cpp      compatible_formats.h +    delayed_destruction_ring.h      dirty_flags.cpp      dirty_flags.h      dma_pusher.cpp @@ -84,14 +85,10 @@ add_library(video_core STATIC      renderer_opengl/gl_device.h      renderer_opengl/gl_fence_manager.cpp      renderer_opengl/gl_fence_manager.h -    renderer_opengl/gl_framebuffer_cache.cpp -    renderer_opengl/gl_framebuffer_cache.h      renderer_opengl/gl_rasterizer.cpp      renderer_opengl/gl_rasterizer.h      renderer_opengl/gl_resource_manager.cpp      renderer_opengl/gl_resource_manager.h -    renderer_opengl/gl_sampler_cache.cpp -    renderer_opengl/gl_sampler_cache.h      renderer_opengl/gl_shader_cache.cpp      renderer_opengl/gl_shader_cache.h      renderer_opengl/gl_shader_decompiler.cpp @@ -113,8 +110,10 @@ add_library(video_core STATIC      renderer_opengl/maxwell_to_gl.h      renderer_opengl/renderer_opengl.cpp      renderer_opengl/renderer_opengl.h -    renderer_opengl/utils.cpp -    renderer_opengl/utils.h +    renderer_opengl/util_shaders.cpp +    renderer_opengl/util_shaders.h +    renderer_vulkan/blit_image.cpp +    renderer_vulkan/blit_image.h      renderer_vulkan/fixed_pipeline_state.cpp      renderer_vulkan/fixed_pipeline_state.h      renderer_vulkan/maxwell_to_vk.cpp @@ -141,8 +140,6 @@ add_library(video_core STATIC      renderer_vulkan/vk_fence_manager.h      renderer_vulkan/vk_graphics_pipeline.cpp      renderer_vulkan/vk_graphics_pipeline.h -    renderer_vulkan/vk_image.cpp -    renderer_vulkan/vk_image.h      renderer_vulkan/vk_master_semaphore.cpp      renderer_vulkan/vk_master_semaphore.h      renderer_vulkan/vk_memory_manager.cpp @@ -153,12 +150,8 @@ add_library(video_core STATIC      renderer_vulkan/vk_query_cache.h      renderer_vulkan/vk_rasterizer.cpp      renderer_vulkan/vk_rasterizer.h -    renderer_vulkan/vk_renderpass_cache.cpp -    renderer_vulkan/vk_renderpass_cache.h      renderer_vulkan/vk_resource_pool.cpp      renderer_vulkan/vk_resource_pool.h -    renderer_vulkan/vk_sampler_cache.cpp -    renderer_vulkan/vk_sampler_cache.h      renderer_vulkan/vk_scheduler.cpp      renderer_vulkan/vk_scheduler.h      renderer_vulkan/vk_shader_decompiler.cpp @@ -179,8 +172,6 @@ add_library(video_core STATIC      renderer_vulkan/vk_update_descriptor.h      renderer_vulkan/wrapper.cpp      renderer_vulkan/wrapper.h -    sampler_cache.cpp -    sampler_cache.h      shader_cache.h      shader_notify.cpp      shader_notify.h @@ -237,19 +228,32 @@ add_library(video_core STATIC      shader/transform_feedback.h      surface.cpp      surface.h +    texture_cache/accelerated_swizzle.cpp +    texture_cache/accelerated_swizzle.h +    texture_cache/decode_bc4.cpp +    texture_cache/decode_bc4.h +    texture_cache/descriptor_table.h +    texture_cache/formatter.cpp +    texture_cache/formatter.h      texture_cache/format_lookup_table.cpp      texture_cache/format_lookup_table.h -    texture_cache/surface_base.cpp -    texture_cache/surface_base.h -    texture_cache/surface_params.cpp -    texture_cache/surface_params.h -    texture_cache/surface_view.cpp -    texture_cache/surface_view.h +    texture_cache/image_base.cpp +    texture_cache/image_base.h +    texture_cache/image_info.cpp +    texture_cache/image_info.h +    texture_cache/image_view_base.cpp +    texture_cache/image_view_base.h +    texture_cache/image_view_info.cpp +    texture_cache/image_view_info.h +    texture_cache/render_targets.h +    texture_cache/samples_helper.h +    texture_cache/slot_vector.h      texture_cache/texture_cache.h +    texture_cache/types.h +    texture_cache/util.cpp +    texture_cache/util.h      textures/astc.cpp      textures/astc.h -    textures/convert.cpp -    textures/convert.h      textures/decoders.cpp      textures/decoders.h      textures/texture.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 38961f3fd..83b9ee871 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -118,20 +118,17 @@ public:      /// Prepares the buffer cache for data uploading      /// @param max_size Maximum number of bytes that will be uploaded      /// @return True when a stream buffer invalidation was required, false otherwise -    bool Map(std::size_t max_size) { +    void Map(std::size_t max_size) {          std::lock_guard lock{mutex}; -        bool invalidated; -        std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); +        std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);          buffer_offset = buffer_offset_base; - -        return invalidated;      }      /// Finishes the upload stream      void Unmap() {          std::lock_guard lock{mutex}; -        stream_buffer->Unmap(buffer_offset - buffer_offset_base); +        stream_buffer.Unmap(buffer_offset - buffer_offset_base);      }      /// Function called at the end of each frame, inteded for deferred operations @@ -261,9 +258,9 @@ public:  protected:      explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,                           Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, -                         std::unique_ptr<StreamBuffer> stream_buffer_) +                         StreamBuffer& stream_buffer_)          : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, -          stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} +          stream_buffer{stream_buffer_} {}      ~BufferCache() = default; @@ -441,7 +438,7 @@ private:          buffer_ptr += size;          buffer_offset += size; -        return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; +        return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};      }      void AlignBuffer(std::size_t alignment) { @@ -567,9 +564,7 @@ private:      VideoCore::RasterizerInterface& rasterizer;      Tegra::MemoryManager& gpu_memory;      Core::Memory::Memory& cpu_memory; - -    std::unique_ptr<StreamBuffer> stream_buffer; -    BufferType stream_buffer_handle; +    StreamBuffer& stream_buffer;      u8* buffer_ptr = nullptr;      u64 buffer_offset = 0; diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e21ce9c..aa8c9f9de 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -9,7 +9,7 @@  #include "video_core/engines/maxwell_3d.h"  #include "video_core/gpu.h"  #include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/decoders.h"  extern "C" {  #include <libswscale/swscale.h> @@ -105,9 +105,9 @@ void Vic::Execute() {              const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,                                                              block_height, 0);              std::vector<u8> swizzled_data(size); -            Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, -                                             swizzled_data.data(), converted_frame_buffer.get(), -                                             false, block_height, 0, 1); +            Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, +                                           frame->width, 4, swizzled_data.data(), +                                           converted_frame_buffer.get(), block_height, 0, 0);              gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);              gpu.Maxwell3D().OnMemoryWrite(); diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index b06c32c84..1619d8664 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -3,9 +3,9 @@  // Refer to the license.txt file included.  #include <array> -#include <bitset>  #include <cstddef> +#include "common/common_types.h"  #include "video_core/compatible_formats.h"  #include "video_core/surface.h" @@ -13,23 +13,25 @@ namespace VideoCore::Surface {  namespace { +using Table = std::array<std::array<u64, 2>, MaxPixelFormat>; +  // Compatibility table taken from Table 3.X.2 in:  // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt -constexpr std::array VIEW_CLASS_128_BITS = { +constexpr std::array VIEW_CLASS_128_BITS{      PixelFormat::R32G32B32A32_FLOAT,      PixelFormat::R32G32B32A32_UINT,      PixelFormat::R32G32B32A32_SINT,  }; -constexpr std::array VIEW_CLASS_96_BITS = { +constexpr std::array VIEW_CLASS_96_BITS{      PixelFormat::R32G32B32_FLOAT,  };  // Missing formats:  // PixelFormat::RGB32UI,  // PixelFormat::RGB32I, -constexpr std::array VIEW_CLASS_64_BITS = { +constexpr std::array VIEW_CLASS_64_BITS{      PixelFormat::R32G32_FLOAT,       PixelFormat::R32G32_UINT,      PixelFormat::R32G32_SINT,        PixelFormat::R16G16B16A16_FLOAT,      PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, @@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {  // TODO: How should we handle 48 bits? -constexpr std::array VIEW_CLASS_32_BITS = { +constexpr std::array VIEW_CLASS_32_BITS{      PixelFormat::R16G16_FLOAT,      PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,      PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT,     PixelFormat::R32_UINT,      PixelFormat::R16G16_SINT,       PixelFormat::R32_SINT,        PixelFormat::A8B8G8R8_UNORM, @@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {  // TODO: How should we handle 24 bits? -constexpr std::array VIEW_CLASS_16_BITS = { +constexpr std::array VIEW_CLASS_16_BITS{      PixelFormat::R16_FLOAT,  PixelFormat::R8G8_UINT,  PixelFormat::R16_UINT,      PixelFormat::R16_SINT,   PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,      PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM,  PixelFormat::R8G8_SINT,  }; -constexpr std::array VIEW_CLASS_8_BITS = { +constexpr std::array VIEW_CLASS_8_BITS{      PixelFormat::R8_UINT,      PixelFormat::R8_UNORM,      PixelFormat::R8_SINT,      PixelFormat::R8_SNORM,  }; -constexpr std::array VIEW_CLASS_RGTC1_RED = { +constexpr std::array VIEW_CLASS_RGTC1_RED{      PixelFormat::BC4_UNORM,      PixelFormat::BC4_SNORM,  }; -constexpr std::array VIEW_CLASS_RGTC2_RG = { +constexpr std::array VIEW_CLASS_RGTC2_RG{      PixelFormat::BC5_UNORM,      PixelFormat::BC5_SNORM,  }; -constexpr std::array VIEW_CLASS_BPTC_UNORM = { +constexpr std::array VIEW_CLASS_BPTC_UNORM{      PixelFormat::BC7_UNORM,      PixelFormat::BC7_SRGB,  }; -constexpr std::array VIEW_CLASS_BPTC_FLOAT = { +constexpr std::array VIEW_CLASS_BPTC_FLOAT{      PixelFormat::BC6H_SFLOAT,      PixelFormat::BC6H_UFLOAT,  }; +constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{ +    PixelFormat::ASTC_2D_4X4_UNORM, +    PixelFormat::ASTC_2D_4X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{ +    PixelFormat::ASTC_2D_5X4_UNORM, +    PixelFormat::ASTC_2D_5X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{ +    PixelFormat::ASTC_2D_5X5_UNORM, +    PixelFormat::ASTC_2D_5X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{ +    PixelFormat::ASTC_2D_6X5_UNORM, +    PixelFormat::ASTC_2D_6X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{ +    PixelFormat::ASTC_2D_6X6_UNORM, +    PixelFormat::ASTC_2D_6X6_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{ +    PixelFormat::ASTC_2D_8X5_UNORM, +    PixelFormat::ASTC_2D_8X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{ +    PixelFormat::ASTC_2D_8X8_UNORM, +    PixelFormat::ASTC_2D_8X8_SRGB, +}; + +// Missing formats: +// PixelFormat::ASTC_2D_10X5_UNORM +// PixelFormat::ASTC_2D_10X5_SRGB + +// Missing formats: +// PixelFormat::ASTC_2D_10X6_UNORM +// PixelFormat::ASTC_2D_10X6_SRGB + +constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ +    PixelFormat::ASTC_2D_10X8_UNORM, +    PixelFormat::ASTC_2D_10X8_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{ +    PixelFormat::ASTC_2D_10X10_UNORM, +    PixelFormat::ASTC_2D_10X10_SRGB, +}; + +// Missing formats +// ASTC_2D_12X10_UNORM, +// ASTC_2D_12X10_SRGB, + +constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ +    PixelFormat::ASTC_2D_12X12_UNORM, +    PixelFormat::ASTC_2D_12X12_SRGB, +}; +  // Compatibility table taken from Table 4.X.1 in:  // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt -constexpr std::array COPY_CLASS_128_BITS = { +constexpr std::array COPY_CLASS_128_BITS{      PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,      PixelFormat::BC2_UNORM,         PixelFormat::BC2_SRGB,           PixelFormat::BC3_UNORM,      PixelFormat::BC3_SRGB,          PixelFormat::BC5_UNORM,          PixelFormat::BC5_SNORM, @@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = {  // PixelFormat::RGBA32I  // COMPRESSED_RG_RGTC2 -constexpr std::array COPY_CLASS_64_BITS = { +constexpr std::array COPY_CLASS_64_BITS{      PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,      PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,      PixelFormat::R16G16B16A16_SINT,  PixelFormat::R32G32_UINT, @@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = {  // COMPRESSED_RGBA_S3TC_DXT1_EXT  // COMPRESSED_SIGNED_RED_RGTC1 -void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { -    compatiblity[format_a][format_b] = true; -    compatiblity[format_b][format_a] = true; +constexpr void Enable(Table& table, size_t format_a, size_t format_b) { +    table[format_a][format_b / 64] |= u64(1) << (format_b % 64); +    table[format_b][format_a / 64] |= u64(1) << (format_a % 64);  } -void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { -    Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); +constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) { +    Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));  }  template <typename Range> -void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { +constexpr void EnableRange(Table& table, const Range& range) {      for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {          for (auto it_b = it_a; it_b != range.end(); ++it_b) { -            Enable(compatibility, *it_a, *it_b); +            Enable(table, *it_a, *it_b);          }      }  } -} // Anonymous namespace +constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) { +    const size_t a = static_cast<size_t>(format_a); +    const size_t b = static_cast<size_t>(format_b); +    return ((table[a][b / 64] >> (b % 64)) & 1) != 0; +} -FormatCompatibility::FormatCompatibility() { +constexpr Table MakeViewTable() { +    Table view{};      for (size_t i = 0; i < MaxPixelFormat; ++i) {          // Identity is allowed          Enable(view, i, i);      } -      EnableRange(view, VIEW_CLASS_128_BITS);      EnableRange(view, VIEW_CLASS_96_BITS);      EnableRange(view, VIEW_CLASS_64_BITS); @@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() {      EnableRange(view, VIEW_CLASS_RGTC2_RG);      EnableRange(view, VIEW_CLASS_BPTC_UNORM);      EnableRange(view, VIEW_CLASS_BPTC_FLOAT); +    EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); +    EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); +    return view; +} -    copy = view; +constexpr Table MakeCopyTable() { +    Table copy = MakeViewTable();      EnableRange(copy, COPY_CLASS_128_BITS);      EnableRange(copy, COPY_CLASS_64_BITS); +    return copy; +} + +} // Anonymous namespace + +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) { +    static constexpr Table TABLE = MakeViewTable(); +    return IsSupported(TABLE, format_a, format_b); +} + +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { +    static constexpr Table TABLE = MakeCopyTable(); +    return IsSupported(TABLE, format_a, format_b);  }  } // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index 51766349b..b5eb03bea 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -4,31 +4,12 @@  #pragma once -#include <array> -#include <bitset> -#include <cstddef> -  #include "video_core/surface.h"  namespace VideoCore::Surface { -class FormatCompatibility { -public: -    using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; - -    explicit FormatCompatibility(); - -    bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { -        return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; -    } - -    bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { -        return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; -    } +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b); -private: -    Table view; -    Table copy; -}; +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);  } // namespace VideoCore::Surface diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h new file mode 100644 index 000000000..4f1d29c04 --- /dev/null +++ b/src/video_core/delayed_destruction_ring.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstddef> +#include <utility> +#include <vector> + +namespace VideoCommon { + +/// Container to push objects to be destroyed a few ticks in the future +template <typename T, size_t TICKS_TO_DESTROY> +class DelayedDestructionRing { +public: +    void Tick() { +        index = (index + 1) % TICKS_TO_DESTROY; +        elements[index].clear(); +    } + +    void Push(T&& object) { +        elements[index].push_back(std::move(object)); +    } + +private: +    size_t index = 0; +    std::array<std::vector<T>, TICKS_TO_DESTROY> elements; +}; + +} // namespace VideoCommon diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 2faa6ef0e..b1eaac00c 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -16,6 +16,9 @@ namespace VideoCommon::Dirty {  using Tegra::Engines::Maxwell3D;  void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { +    FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); +    FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); +      static constexpr std::size_t num_per_rt = NUM(rt[0]);      static constexpr std::size_t begin = OFF(rt);      static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; @@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl          FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);      }      FillBlock(tables[1], begin, num, RenderTargets); +    FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets); + +    tables[0][OFF(rt_control)] = RenderTargets; +    tables[1][OFF(rt_control)] = RenderTargetControl;      static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};      for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 3f6c1d83a..875527ddd 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {  enum : u8 {      NullEntry = 0, +    Descriptors, +      RenderTargets, +    RenderTargetControl,      ColorBuffer0,      ColorBuffer1,      ColorBuffer2, diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 4293d676c..a01d334ad 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,11 @@  namespace Tegra::Engines { -Fermi2D::Fermi2D() = default; +Fermi2D::Fermi2D() { +    // Nvidia's OpenGL driver seems to assume these values +    regs.src.depth = 1; +    regs.dst.depth = 1; +}  Fermi2D::~Fermi2D() = default; @@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {  void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {      ASSERT_MSG(method < Regs::NUM_REGS,                 "Invalid Fermi2D register, increase the size of the Regs structure"); -      regs.reg_array[method] = method_argument; -    switch (method) { -    // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, -    // so trigger on the second 32-bit write. -    case FERMI2D_REG_INDEX(blit_src_y) + 1: { -        HandleSurfaceCopy(); -        break; -    } +    if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) { +        Blit();      }  }  void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { -    for (std::size_t i = 0; i < amount; i++) { -        CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); +    for (u32 i = 0; i < amount; ++i) { +        CallMethod(method, base_start[i], methods_pending - i <= 1);      }  } -static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { -    const u32 line_a = src_2 - src_1; -    const u32 line_b = dst_2 - dst_1; -    const u32 excess = std::max<s32>(0, line_a - src_line + src_1); -    return {line_b - (excess * line_b) / line_a, excess}; -} - -void Fermi2D::HandleSurfaceCopy() { -    LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); +void Fermi2D::Blit() { +    LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", +              regs.src.Address(), regs.dst.Address()); -    // TODO(Subv): Only raw copies are implemented. -    ASSERT(regs.operation == Operation::SrcCopy); +    UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy"); +    UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero"); +    UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero"); +    UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one"); +    UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); -    const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; -    const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; -    u32 src_blit_x2, src_blit_y2; -    if (regs.blit_control.origin == Origin::Corner) { -        src_blit_x2 = -            static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); -        src_blit_y2 = -            static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); -    } else { -        src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width); -        src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height); -    } -    u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; -    u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; -    const auto [new_dst_w, src_excess_x] = -        DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); -    const auto [new_dst_h, src_excess_y] = -        DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); -    dst_blit_x2 = new_dst_w + regs.blit_dst_x; -    src_blit_x2 = src_blit_x2 - src_excess_x; -    dst_blit_y2 = new_dst_h + regs.blit_dst_y; -    src_blit_y2 = src_blit_y2 - src_excess_y; -    const auto [new_src_w, dst_excess_x] = -        DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); -    const auto [new_src_h, dst_excess_y] = -        DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); -    src_blit_x2 = new_src_w + src_blit_x1; -    dst_blit_x2 = dst_blit_x2 - dst_excess_x; -    src_blit_y2 = new_src_h + src_blit_y1; -    dst_blit_y2 = dst_blit_y2 - dst_excess_y; -    const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; -    const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, -                                          dst_blit_y2}; -    const Config copy_config{ +    const auto& args = regs.pixels_from_memory; +    const Config config{          .operation = regs.operation, -        .filter = regs.blit_control.filter, -        .src_rect = src_rect, -        .dst_rect = dst_rect, +        .filter = args.sample_mode.filter, +        .dst_x0 = args.dst_x0, +        .dst_y0 = args.dst_y0, +        .dst_x1 = args.dst_x0 + args.dst_width, +        .dst_y1 = args.dst_y0 + args.dst_height, +        .src_x0 = static_cast<s32>(args.src_x0 >> 32), +        .src_y0 = static_cast<s32>(args.src_y0 >> 32), +        .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), +        .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),      }; -    if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { +    if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {          UNIMPLEMENTED();      }  } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0909709ec..81522988e 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -53,8 +53,8 @@ public:      };      enum class Filter : u32 { -        PointSample = 0, // Nearest -        Linear = 1, +        Point = 0, +        Bilinear = 1,      };      enum class Operation : u32 { @@ -67,88 +67,235 @@ public:          BlendPremult = 6,      }; -    struct Regs { -        static constexpr std::size_t NUM_REGS = 0x258; +    enum class MemoryLayout : u32 { +        BlockLinear = 0, +        Pitch = 1, +    }; -        struct Surface { -            RenderTargetFormat format; -            BitField<0, 1, u32> linear; -            union { -                BitField<0, 4, u32> block_width; -                BitField<4, 4, u32> block_height; -                BitField<8, 4, u32> block_depth; -            }; -            u32 depth; -            u32 layer; -            u32 pitch; -            u32 width; -            u32 height; -            u32 address_high; -            u32 address_low; - -            GPUVAddr Address() const { -                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | -                                             address_low); -            } - -            u32 BlockWidth() const { -                return block_width.Value(); -            } - -            u32 BlockHeight() const { -                return block_height.Value(); -            } - -            u32 BlockDepth() const { -                return block_depth.Value(); -            } -        }; -        static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); +    enum class CpuIndexWrap : u32 { +        Wrap = 0, +        NoWrap = 1, +    }; +    struct Surface { +        RenderTargetFormat format; +        MemoryLayout linear;          union { -            struct { -                INSERT_UNION_PADDING_WORDS(0x80); +            BitField<0, 4, u32> block_width; +            BitField<4, 4, u32> block_height; +            BitField<8, 4, u32> block_depth; +        }; +        u32 depth; +        u32 layer; +        u32 pitch; +        u32 width; +        u32 height; +        u32 addr_upper; +        u32 addr_lower; + +        [[nodiscard]] constexpr GPUVAddr Address() const noexcept { +            return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower); +        } +    }; +    static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); -                Surface dst; +    enum class SectorPromotion : u32 { +        NoPromotion = 0, +        PromoteTo2V = 1, +        PromoteTo2H = 2, +        PromoteTo4 = 3, +    }; + +    enum class NumTpcs : u32 { +        All = 0, +        One = 1, +    }; -                INSERT_UNION_PADDING_WORDS(2); +    enum class RenderEnableMode : u32 { +        False = 0, +        True = 1, +        Conditional = 2, +        RenderIfEqual = 3, +        RenderIfNotEqual = 4, +    }; -                Surface src; +    enum class ColorKeyFormat : u32 { +        A16R56G6B5 = 0, +        A1R5G55B5 = 1, +        A8R8G8B8 = 2, +        A2R10G10B10 = 3, +        Y8 = 4, +        Y16 = 5, +        Y32 = 6, +    }; -                INSERT_UNION_PADDING_WORDS(0x15); +    union Beta4 { +        BitField<0, 8, u32> b; +        BitField<8, 8, u32> g; +        BitField<16, 8, u32> r; +        BitField<24, 8, u32> a; +    }; -                Operation operation; +    struct Point { +        u32 x; +        u32 y; +    }; -                INSERT_UNION_PADDING_WORDS(0x177); +    enum class PatternSelect : u32 { +        MonoChrome8x8 = 0, +        MonoChrome64x1 = 1, +        MonoChrome1x64 = 2, +        Color = 3, +    }; +    enum class NotifyType : u32 { +        WriteOnly = 0, +        WriteThenAwaken = 1, +    }; + +    enum class MonochromePatternColorFormat : u32 { +        A8X8R8G6B5 = 0, +        A1R5G5B5 = 1, +        A8R8G8B8 = 2, +        A8Y8 = 3, +        A8X8Y16 = 4, +        Y32 = 5, +    }; + +    enum class MonochromePatternFormat : u32 { +        CGA6_M1 = 0, +        LE_M1 = 1, +    }; + +    union Regs { +        static constexpr std::size_t NUM_REGS = 0x258; +        struct { +            u32 object; +            INSERT_UNION_PADDING_WORDS(0x3F); +            u32 no_operation; +            NotifyType notify; +            INSERT_UNION_PADDING_WORDS(0x2); +            u32 wait_for_idle; +            INSERT_UNION_PADDING_WORDS(0xB); +            u32 pm_trigger; +            INSERT_UNION_PADDING_WORDS(0xF); +            u32 context_dma_notify; +            u32 dst_context_dma; +            u32 src_context_dma; +            u32 semaphore_context_dma; +            INSERT_UNION_PADDING_WORDS(0x1C); +            Surface dst; +            CpuIndexWrap pixels_from_cpu_index_wrap; +            u32 kind2d_check_enable; +            Surface src; +            SectorPromotion pixels_from_memory_sector_promotion; +            INSERT_UNION_PADDING_WORDS(0x1); +            NumTpcs num_tpcs; +            u32 render_enable_addr_upper; +            u32 render_enable_addr_lower; +            RenderEnableMode render_enable_mode; +            INSERT_UNION_PADDING_WORDS(0x4); +            u32 clip_x0; +            u32 clip_y0; +            u32 clip_width; +            u32 clip_height; +            BitField<0, 1, u32> clip_enable; +            BitField<0, 3, ColorKeyFormat> color_key_format; +            u32 color_key; +            BitField<0, 1, u32> color_key_enable; +            BitField<0, 8, u32> rop; +            u32 beta1; +            Beta4 beta4; +            Operation operation; +            union { +                BitField<0, 6, u32> x; +                BitField<8, 6, u32> y; +            } pattern_offset; +            BitField<0, 2, PatternSelect> pattern_select; +            INSERT_UNION_PADDING_WORDS(0xC); +            struct { +                BitField<0, 3, MonochromePatternColorFormat> color_format; +                BitField<0, 1, MonochromePatternFormat> format; +                u32 color0; +                u32 color1; +                u32 pattern0; +                u32 pattern1; +            } monochrome_pattern; +            struct { +                std::array<u32, 0x40> X8R8G8B8; +                std::array<u32, 0x20> R5G6B5; +                std::array<u32, 0x20> X1R5G5B5; +                std::array<u32, 0x10> Y8; +            } color_pattern; +            INSERT_UNION_PADDING_WORDS(0x10); +            struct { +                u32 prim_mode; +                u32 prim_color_format; +                u32 prim_color; +                u32 line_tie_break_bits; +                INSERT_UNION_PADDING_WORDS(0x14); +                u32 prim_point_xy; +                INSERT_UNION_PADDING_WORDS(0x7); +                std::array<Point, 0x40> prim_point; +            } render_solid; +            struct { +                u32 data_type; +                u32 color_format; +                u32 index_format; +                u32 mono_format; +                u32 wrap; +                u32 color0; +                u32 color1; +                u32 mono_opacity; +                INSERT_UNION_PADDING_WORDS(0x6); +                u32 src_width; +                u32 src_height; +                u32 dx_du_frac; +                u32 dx_du_int; +                u32 dx_dv_frac; +                u32 dy_dv_int; +                u32 dst_x0_frac; +                u32 dst_x0_int; +                u32 dst_y0_frac; +                u32 dst_y0_int; +                u32 data; +            } pixels_from_cpu; +            INSERT_UNION_PADDING_WORDS(0x3); +            u32 big_endian_control; +            INSERT_UNION_PADDING_WORDS(0x3); +            struct { +                BitField<0, 3, u32> block_shape; +                BitField<0, 5, u32> corral_size; +                BitField<0, 1, u32> safe_overlap;                  union { -                    u32 raw;                      BitField<0, 1, Origin> origin;                      BitField<4, 1, Filter> filter; -                } blit_control; - +                } sample_mode;                  INSERT_UNION_PADDING_WORDS(0x8); - -                u32 blit_dst_x; -                u32 blit_dst_y; -                u32 blit_dst_width; -                u32 blit_dst_height; -                u64 blit_du_dx; -                u64 blit_dv_dy; -                u64 blit_src_x; -                u64 blit_src_y; - -                INSERT_UNION_PADDING_WORDS(0x21); -            }; -            std::array<u32, NUM_REGS> reg_array; +                s32 dst_x0; +                s32 dst_y0; +                s32 dst_width; +                s32 dst_height; +                s64 du_dx; +                s64 dv_dy; +                s64 src_x0; +                s64 src_y0; +            } pixels_from_memory;          }; +        std::array<u32, NUM_REGS> reg_array;      } regs{};      struct Config { -        Operation operation{}; -        Filter filter{}; -        Common::Rectangle<u32> src_rect; -        Common::Rectangle<u32> dst_rect; +        Operation operation; +        Filter filter; +        s32 dst_x0; +        s32 dst_y0; +        s32 dst_x1; +        s32 dst_y1; +        s32 src_x0; +        s32 src_y0; +        s32 src_x1; +        s32 src_y1;      };  private: @@ -156,25 +303,49 @@ private:      /// Performs the copy from the source surface to the destination surface as configured in the      /// registers. -    void HandleSurfaceCopy(); +    void Blit();  };  #define ASSERT_REG_POSITION(field_name, position)                                                  \ -    static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4,                             \ +    static_assert(offsetof(Fermi2D::Regs, field_name) == position,                                 \                    "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(dst, 0x80); -ASSERT_REG_POSITION(src, 0x8C); -ASSERT_REG_POSITION(operation, 0xAB); -ASSERT_REG_POSITION(blit_control, 0x223); -ASSERT_REG_POSITION(blit_dst_x, 0x22c); -ASSERT_REG_POSITION(blit_dst_y, 0x22d); -ASSERT_REG_POSITION(blit_dst_width, 0x22e); -ASSERT_REG_POSITION(blit_dst_height, 0x22f); -ASSERT_REG_POSITION(blit_du_dx, 0x230); -ASSERT_REG_POSITION(blit_dv_dy, 0x232); -ASSERT_REG_POSITION(blit_src_x, 0x234); -ASSERT_REG_POSITION(blit_src_y, 0x236); +ASSERT_REG_POSITION(object, 0x0); +ASSERT_REG_POSITION(no_operation, 0x100); +ASSERT_REG_POSITION(notify, 0x104); +ASSERT_REG_POSITION(wait_for_idle, 0x110); +ASSERT_REG_POSITION(pm_trigger, 0x140); +ASSERT_REG_POSITION(context_dma_notify, 0x180); +ASSERT_REG_POSITION(dst_context_dma, 0x184); +ASSERT_REG_POSITION(src_context_dma, 0x188); +ASSERT_REG_POSITION(semaphore_context_dma, 0x18C); +ASSERT_REG_POSITION(dst, 0x200); +ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228); +ASSERT_REG_POSITION(kind2d_check_enable, 0x22C); +ASSERT_REG_POSITION(src, 0x230); +ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258); +ASSERT_REG_POSITION(num_tpcs, 0x260); +ASSERT_REG_POSITION(render_enable_addr_upper, 0x264); +ASSERT_REG_POSITION(render_enable_addr_lower, 0x268); +ASSERT_REG_POSITION(clip_x0, 0x280); +ASSERT_REG_POSITION(clip_y0, 0x284); +ASSERT_REG_POSITION(clip_width, 0x288); +ASSERT_REG_POSITION(clip_height, 0x28c); +ASSERT_REG_POSITION(clip_enable, 0x290); +ASSERT_REG_POSITION(color_key_format, 0x294); +ASSERT_REG_POSITION(color_key, 0x298); +ASSERT_REG_POSITION(rop, 0x2A0); +ASSERT_REG_POSITION(beta1, 0x2A4); +ASSERT_REG_POSITION(beta4, 0x2A8); +ASSERT_REG_POSITION(operation, 0x2AC); +ASSERT_REG_POSITION(pattern_offset, 0x2B0); +ASSERT_REG_POSITION(pattern_select, 0x2B4); +ASSERT_REG_POSITION(monochrome_pattern, 0x2E8); +ASSERT_REG_POSITION(color_pattern, 0x300); +ASSERT_REG_POSITION(render_solid, 0x580); +ASSERT_REG_POSITION(pixels_from_cpu, 0x800); +ASSERT_REG_POSITION(big_endian_control, 0x870); +ASSERT_REG_POSITION(pixels_from_memory, 0x880);  #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 898370739..ba387506e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun      }  } -Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { -    const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); -    ASSERT(cbuf_mask[regs.tex_cb_index]); - -    const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; -    ASSERT(texinfo.Address() != 0); - -    const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); -    ASSERT(address < texinfo.Address() + texinfo.size); - -    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; -    return GetTextureInfo(tex_handle); -} - -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { -    return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} -  u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {      ASSERT(stage == ShaderType::Compute);      const auto& buffer = launch_description.const_buffer_config[const_buffer]; @@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con  SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {      const Texture::TextureHandle tex_handle{handle}; -    const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); -    SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); -    result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); +    const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); +    const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + +    SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); +    result.is_shadow.Assign(tsc.depth_compare_enabled.Value());      return result;  } diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7f2500aab..51a041202 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -209,11 +209,6 @@ public:      void CallMultiMethod(u32 method, const u32* base_start, u32 amount,                           u32 methods_pending) override; -    Texture::FullTextureInfo GetTexture(std::size_t offset) const; - -    /// Given a texture handle, returns the TSC and TIC entries. -    Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; -      u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;      SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 761962ed0..9be651e24 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -2,7 +2,6 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -#include <cinttypes>  #include <cstring>  #include <optional>  #include "common/assert.h" @@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume              OnMemoryWrite();          }          return; +    case MAXWELL3D_REG_INDEX(fragment_barrier): +        return rasterizer->FragmentBarrier(); +    case MAXWELL3D_REG_INDEX(tiled_cache_barrier): +        return rasterizer->TiledCacheBarrier();      }  } @@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() {  }  Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { -    const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; +    const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};      Texture::TICEntry tic_entry;      memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); @@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {  }  Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { -    const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; +    const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};      Texture::TSCEntry tsc_entry;      memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));      return tsc_entry;  } -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { -    return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - -Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { -    const auto stage_index = static_cast<std::size_t>(stage); -    const auto& shader = state.shader_stages[stage_index]; -    const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; -    ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - -    const GPUVAddr tex_info_address = -        tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - -    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - -    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - -    return GetTextureInfo(tex_handle); -} -  u32 Maxwell3D::GetRegisterValue(u32 method) const {      ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");      return regs.reg_array[method];  }  void Maxwell3D::ProcessClearBuffers() { -    ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && -           regs.clear_buffers.R == regs.clear_buffers.B && -           regs.clear_buffers.R == regs.clear_buffers.A); -      rasterizer->Clear();  } @@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse      ASSERT(stage != ShaderType::Compute);      const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];      const auto& buffer = shader_stage.const_buffers[const_buffer]; -    u32 result; -    std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); -    return result; +    return memory_manager.Read<u32>(buffer.address + offset);  }  SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { @@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b  SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {      const Texture::TextureHandle tex_handle{handle}; -    const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); -    SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); -    result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); +    const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); +    const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + +    SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); +    result.is_shadow.Assign(tsc.depth_compare_enabled.Value());      return result;  } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 564acbc53..bf9e07c9b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -438,16 +438,6 @@ public:              DecrWrapOGL = 0x8508,          }; -        enum class MemoryLayout : u32 { -            Linear = 0, -            BlockLinear = 1, -        }; - -        enum class InvMemoryLayout : u32 { -            BlockLinear = 0, -            Linear = 1, -        }; -          enum class CounterReset : u32 {              SampleCnt = 0x01,              Unk02 = 0x02, @@ -589,21 +579,31 @@ public:              NegativeW = 7,          }; +        enum class SamplerIndex : u32 { +            Independently = 0, +            ViaHeaderIndex = 1, +        }; + +        struct TileMode { +            union { +                BitField<0, 4, u32> block_width; +                BitField<4, 4, u32> block_height; +                BitField<8, 4, u32> block_depth; +                BitField<12, 1, u32> is_pitch_linear; +                BitField<16, 1, u32> is_3d; +            }; +        }; +        static_assert(sizeof(TileMode) == 4); +          struct RenderTargetConfig {              u32 address_high;              u32 address_low;              u32 width;              u32 height;              Tegra::RenderTargetFormat format; +            TileMode tile_mode;              union { -                BitField<0, 3, u32> block_width; -                BitField<4, 3, u32> block_height; -                BitField<8, 3, u32> block_depth; -                BitField<12, 1, InvMemoryLayout> type; -                BitField<16, 1, u32> is_3d; -            } memory_layout; -            union { -                BitField<0, 16, u32> layers; +                BitField<0, 16, u32> depth;                  BitField<16, 1, u32> volume;              };              u32 layer_stride; @@ -832,7 +832,11 @@ public:                  u32 patch_vertices; -                INSERT_UNION_PADDING_WORDS(0xC); +                INSERT_UNION_PADDING_WORDS(0x4); + +                u32 fragment_barrier; + +                INSERT_UNION_PADDING_WORDS(0x7);                  std::array<ScissorTest, NumViewports> scissor_test; @@ -842,7 +846,15 @@ public:                  u32 stencil_back_mask;                  u32 stencil_back_func_mask; -                INSERT_UNION_PADDING_WORDS(0xC); +                INSERT_UNION_PADDING_WORDS(0x5); + +                u32 invalidate_texture_data_cache; + +                INSERT_UNION_PADDING_WORDS(0x1); + +                u32 tiled_cache_barrier; + +                INSERT_UNION_PADDING_WORDS(0x4);                  u32 color_mask_common; @@ -866,12 +878,7 @@ public:                      u32 address_high;                      u32 address_low;                      Tegra::DepthFormat format; -                    union { -                        BitField<0, 4, u32> block_width; -                        BitField<4, 4, u32> block_height; -                        BitField<8, 4, u32> block_depth; -                        BitField<20, 1, InvMemoryLayout> type; -                    } memory_layout; +                    TileMode tile_mode;                      u32 layer_stride;                      GPUVAddr Address() const { @@ -880,7 +887,18 @@ public:                      }                  } zeta; -                INSERT_UNION_PADDING_WORDS(0x41); +                struct { +                    union { +                        BitField<0, 16, u32> x; +                        BitField<16, 16, u32> width; +                    }; +                    union { +                        BitField<0, 16, u32> y; +                        BitField<16, 16, u32> height; +                    }; +                } render_area; + +                INSERT_UNION_PADDING_WORDS(0x3F);                  union {                      BitField<0, 4, u32> stencil; @@ -921,7 +939,7 @@ public:                          BitField<25, 3, u32> map_7;                      }; -                    u32 GetMap(std::size_t index) const { +                    u32 Map(std::size_t index) const {                          const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,                                                                       map_4, map_5, map_6, map_7};                          ASSERT(index < maps.size()); @@ -934,11 +952,13 @@ public:                  u32 zeta_width;                  u32 zeta_height;                  union { -                    BitField<0, 16, u32> zeta_layers; +                    BitField<0, 16, u32> zeta_depth;                      BitField<16, 1, u32> zeta_volume;                  }; -                INSERT_UNION_PADDING_WORDS(0x26); +                SamplerIndex sampler_index; + +                INSERT_UNION_PADDING_WORDS(0x25);                  u32 depth_test_enable; @@ -964,6 +984,7 @@ public:                      float b;                      float a;                  } blend_color; +                  INSERT_UNION_PADDING_WORDS(0x4);                  struct { @@ -1001,7 +1022,12 @@ public:                  float line_width_smooth;                  float line_width_aliased; -                INSERT_UNION_PADDING_WORDS(0x1F); +                INSERT_UNION_PADDING_WORDS(0x1B); + +                u32 invalidate_sampler_cache_no_wfi; +                u32 invalidate_texture_header_cache_no_wfi; + +                INSERT_UNION_PADDING_WORDS(0x2);                  u32 vb_element_base;                  u32 vb_base_instance; @@ -1045,13 +1071,13 @@ public:                  } condition;                  struct { -                    u32 tsc_address_high; -                    u32 tsc_address_low; -                    u32 tsc_limit; +                    u32 address_high; +                    u32 address_low; +                    u32 limit; -                    GPUVAddr TSCAddress() const { -                        return static_cast<GPUVAddr>( -                            (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); +                    GPUVAddr Address() const { +                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | +                                                     address_low);                      }                  } tsc; @@ -1062,13 +1088,13 @@ public:                  u32 line_smooth_enable;                  struct { -                    u32 tic_address_high; -                    u32 tic_address_low; -                    u32 tic_limit; +                    u32 address_high; +                    u32 address_low; +                    u32 limit; -                    GPUVAddr TICAddress() const { -                        return static_cast<GPUVAddr>( -                            (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); +                    GPUVAddr Address() const { +                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | +                                                     address_low);                      }                  } tic; @@ -1397,12 +1423,6 @@ public:      void FlushMMEInlineDraw(); -    /// Given a texture handle, returns the TSC and TIC entries. -    Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - -    /// Returns the texture information for a specific texture in a specific shader stage. -    Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; -      u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;      SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; @@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);  ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);  ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);  ASSERT_REG_POSITION(patch_vertices, 0x373); +ASSERT_REG_POSITION(fragment_barrier, 0x378);  ASSERT_REG_POSITION(scissor_test, 0x380);  ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);  ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);  ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD); +ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);  ASSERT_REG_POSITION(color_mask_common, 0x3E4);  ASSERT_REG_POSITION(depth_bounds, 0x3E7);  ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); @@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);  ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);  ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);  ASSERT_REG_POSITION(zeta, 0x3F8); +ASSERT_REG_POSITION(render_area, 0x3FD);  ASSERT_REG_POSITION(clear_flags, 0x43E);  ASSERT_REG_POSITION(fill_rectangle, 0x44F);  ASSERT_REG_POSITION(vertex_attrib_format, 0x458); @@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);  ASSERT_REG_POSITION(rt_control, 0x487);  ASSERT_REG_POSITION(zeta_width, 0x48a);  ASSERT_REG_POSITION(zeta_height, 0x48b); -ASSERT_REG_POSITION(zeta_layers, 0x48c); +ASSERT_REG_POSITION(zeta_depth, 0x48c); +ASSERT_REG_POSITION(sampler_index, 0x48D);  ASSERT_REG_POSITION(depth_test_enable, 0x4B3);  ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);  ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);  ASSERT_REG_POSITION(screen_y_control, 0x4EB);  ASSERT_REG_POSITION(line_width_smooth, 0x4EC);  ASSERT_REG_POSITION(line_width_aliased, 0x4ED); +ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509); +ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);  ASSERT_REG_POSITION(vb_element_base, 0x50D);  ASSERT_REG_POSITION(vb_base_instance, 0x50E);  ASSERT_REG_POSITION(clip_distance_enabled, 0x544); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1c29e895e..ba750748c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {  }  void MaxwellDMA::CopyBlockLinearToPitch() { +    UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);      UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);      UNIMPLEMENTED_IF(regs.src_params.layer != 0); @@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {  }  void MaxwellDMA::CopyPitchToBlockLinear() { +    UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); +      const auto& dst_params = regs.dst_params;      const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;      const u32 width = dst_params.width; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c5f26896e..3512283ff 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -9,6 +9,7 @@  #include "common/common_types.h"  #include "core/core.h" +#include "video_core/delayed_destruction_ring.h"  #include "video_core/gpu.h"  #include "video_core/memory_manager.h"  #include "video_core/rasterizer_interface.h" @@ -47,6 +48,11 @@ protected:  template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>  class FenceManager {  public: +    /// Notify the fence manager about a new frame +    void TickFrame() { +        delayed_destruction_ring.Tick(); +    } +      void SignalSemaphore(GPUVAddr addr, u32 value) {          TryReleasePendingFences();          const bool should_flush = ShouldFlush(); @@ -86,7 +92,7 @@ public:              } else {                  gpu.IncrementSyncPoint(current_fence->GetPayload());              } -            fences.pop(); +            PopFence();          }      } @@ -132,7 +138,7 @@ private:              } else {                  gpu.IncrementSyncPoint(current_fence->GetPayload());              } -            fences.pop(); +            PopFence();          }      } @@ -158,7 +164,14 @@ private:          query_cache.CommitAsyncFlushes();      } +    void PopFence() { +        delayed_destruction_ring.Push(std::move(fences.front())); +        fences.pop(); +    } +      std::queue<TFence> fences; + +    DelayedDestructionRing<TFence, 6> delayed_destruction_ring;  };  } // namespace VideoCommon diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c157724a9..4c7399d5a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,8 +1,26 @@ -set(SHADER_SOURCES +set(SHADER_FILES +    block_linear_unswizzle_2d.comp +    block_linear_unswizzle_3d.comp +    convert_depth_to_float.frag +    convert_float_to_depth.frag +    full_screen_triangle.vert +    opengl_copy_bc4.comp      opengl_present.frag      opengl_present.vert +    pitch_unswizzle.comp +    vulkan_blit_color_float.frag +    vulkan_blit_depth_stencil.frag +    vulkan_present.frag +    vulkan_present.vert +    vulkan_quad_array.comp +    vulkan_quad_indexed.comp +    vulkan_uint8.comp  ) +find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) + +set(GLSL_FLAGS "") +  set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)  set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)  set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) @@ -10,27 +28,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)  set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)  set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) -foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) +foreach(FILENAME IN ITEMS ${SHADER_FILES})      string(REPLACE "." "_" SHADER_NAME ${FILENAME})      set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) -    set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) -    add_custom_command( -        OUTPUT -            ${HEADER_FILE} -        COMMAND -            ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} -        MAIN_DEPENDENCY -            ${SOURCE_FILE} -        DEPENDS -            ${INPUT_FILE} -            # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified -    ) -    set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) +    # Skip generating source headers on Vulkan exclusive files +    if (NOT ${FILENAME} MATCHES "vulkan.*") +        set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) +        add_custom_command( +            OUTPUT +                ${SOURCE_HEADER_FILE} +            COMMAND +                ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} +            MAIN_DEPENDENCY +                ${SOURCE_FILE} +            DEPENDS +                ${INPUT_FILE} +                # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified +        ) +        set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) +    endif() +    # Skip compiling to SPIR-V OpenGL exclusive files +    if (NOT ${FILENAME} MATCHES "opengl.*") +        string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME) +        set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h) +        add_custom_command( +            OUTPUT +                ${SPIRV_HEADER_FILE} +            COMMAND +                ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} +            MAIN_DEPENDENCY +                ${SOURCE_FILE} +        ) +        set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) +    endif()  endforeach()  add_custom_target(host_shaders      DEPENDS          ${SHADER_HEADERS}      SOURCES -        ${SHADER_SOURCES} +        ${SHADER_FILES}  ) diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp new file mode 100644 index 000000000..a131be79e --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint layer_stride; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { +    uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { +    pos = pos & SWIZZLE_MASK; +    return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { +    switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES +    case 0: +        return uvec4(u8data[offset], 0, 0, 0); +    case 1: +        return uvec4(u16data[offset / 2], 0, 0, 0); +#else +    case 0: +        return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); +    case 1: +        return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif +    case 2: +        return uvec4(u32data[offset / 4], 0, 0, 0); +    case 3: +        return uvec4(u64data[offset / 8], 0, 0); +    case 4: +        return u128data[offset / 16]; +    } +    return uvec4(0); +} + +void main() { +    uvec3 pos = gl_GlobalInvocationID + origin; +    pos.x <<= bytes_per_block_log2; + +    // Read as soon as possible due to its latency +    const uint swizzle = SwizzleOffset(pos.xy); + +    const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + +    uint offset = 0; +    offset += pos.z * layer_stride; +    offset += (block_y >> block_height) * block_size; +    offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; +    offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; +    offset += swizzle; + +    const uvec4 texel = ReadTexel(offset); +    const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; +    imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp new file mode 100644 index 000000000..bb6872e6b --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp @@ -0,0 +1,125 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint slice_size; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +UNIFORM(8) uint block_depth; +UNIFORM(9) uint block_depth_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { +    uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; + +layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { +    pos = pos & SWIZZLE_MASK; +    return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { +    switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES +    case 0: +        return uvec4(u8data[offset], 0, 0, 0); +    case 1: +        return uvec4(u16data[offset / 2], 0, 0, 0); +#else +    case 0: +        return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); +    case 1: +        return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif +    case 2: +        return uvec4(u32data[offset / 4], 0, 0, 0); +    case 3: +        return uvec4(u64data[offset / 8], 0, 0); +    case 4: +        return u128data[offset / 16]; +    } +    return uvec4(0); +} + +void main() { +    uvec3 pos = gl_GlobalInvocationID + origin; +    pos.x <<= bytes_per_block_log2; + +    // Read as soon as possible due to its latency +    const uint swizzle = SwizzleOffset(pos.xy); + +    const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + +    uint offset = 0; +    offset += (pos.z >> block_depth) * slice_size; +    offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); +    offset += (block_y >> block_height) * block_size; +    offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; +    offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; +    offset += swizzle; + +    const uvec4 texel = ReadTexel(offset); +    const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; +    imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag new file mode 100644 index 000000000..624c58509 --- /dev/null +++ b/src/video_core/host_shaders/convert_depth_to_float.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_texture; +layout(location = 0) out float output_color; + +void main() { +    ivec2 coord = ivec2(gl_FragCoord.xy); +    output_color = texelFetch(depth_texture, coord, 0).r; +} diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag new file mode 100644 index 000000000..d86c795f4 --- /dev/null +++ b/src/video_core/host_shaders/convert_float_to_depth.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { +    ivec2 coord = ivec2(gl_FragCoord.xy); +    float color = texelFetch(color_texture, coord, 0).r; +    gl_FragDepth = color; +} diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert new file mode 100644 index 000000000..452ad6502 --- /dev/null +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +#ifdef VULKAN +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) vec2 tex_scale; +UNIFORM(1) vec2 tex_offset; +END_PUSH_CONSTANTS + +layout(location = 0) out vec2 texcoord; + +void main() { +    float x = float((gl_VertexIndex & 1) << 2); +    float y = float((gl_VertexIndex & 2) << 1); +    gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); +    texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); +} diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp new file mode 100644 index 000000000..7b8e20fbe --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bc4.comp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 core +#extension GL_ARB_gpu_shader_int64 : require + +layout (local_size_x = 4, local_size_y = 4) in; + +layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input; +layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output; + +layout(location = 0) uniform uvec3 src_offset; +layout(location = 1) uniform uvec3 dst_offset; + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +uint DecompressBlock(uint64_t bits, uvec2 coord) { +    const uint code_offset = 16 + 3 * (4 * coord.y + coord.x); +    const uint code = uint(bits >> code_offset) & 7; +    const uint red0 = uint(bits >> 0) & 0xff; +    const uint red1 = uint(bits >> 8) & 0xff; +    if (red0 > red1) { +        switch (code) { +        case 0: +            return red0; +        case 1: +            return red1; +        case 2: +            return (6 * red0 + 1 * red1) / 7; +        case 3: +            return (5 * red0 + 2 * red1) / 7; +        case 4: +            return (4 * red0 + 3 * red1) / 7; +        case 5: +            return (3 * red0 + 4 * red1) / 7; +        case 6: +            return (2 * red0 + 5 * red1) / 7; +        case 7: +            return (1 * red0 + 6 * red1) / 7; +        } +    } else { +        switch (code) { +        case 0: +            return red0; +        case 1: +            return red1; +        case 2: +            return (4 * red0 + 1 * red1) / 5; +        case 3: +            return (3 * red0 + 2 * red1) / 5; +        case 4: +            return (2 * red0 + 3 * red1) / 5; +        case 5: +            return (1 * red0 + 4 * red1) / 5; +        case 6: +            return 0; +        case 7: +            return 0xff; +        } +    } +    return 0; +} + +void main() { +    uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg; +    uint64_t bits = packUint2x32(packed_bits); +    uint red = DecompressBlock(bits, gl_LocalInvocationID.xy); +    uvec4 color = uvec4(red & 0xff, 0, 0, 0xff); +    imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color); +} diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag index 8a4cb024b..84b818227 100644 --- a/src/video_core/host_shaders/opengl_present.frag +++ b/src/video_core/host_shaders/opengl_present.frag @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. +  #version 430 core  layout (location = 0) in vec2 frag_tex_coord; diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert index 2235d31a4..c3b5adbba 100644 --- a/src/video_core/host_shaders/opengl_present.vert +++ b/src/video_core/host_shaders/opengl_present.vert @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. +  #version 430 core  out gl_PerVertex { diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp new file mode 100644 index 000000000..cb48ec170 --- /dev/null +++ b/src/video_core/host_shaders/pitch_unswizzle.comp @@ -0,0 +1,86 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec2 origin; +UNIFORM(1) ivec2 destination; +UNIFORM(2) uint bytes_per_block; +UNIFORM(3) uint pitch; +END_PUSH_CONSTANTS + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +uvec4 ReadTexel(uint offset) { +    switch (bytes_per_block) { +#if HAS_EXTENDED_TYPES +    case 1: +        return uvec4(u8data[offset], 0, 0, 0); +    case 2: +        return uvec4(u16data[offset / 2], 0, 0, 0); +#else +    case 1: +        return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); +    case 2: +        return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif +    case 4: +        return uvec4(u32data[offset / 4], 0, 0, 0); +    case 8: +        return uvec4(u64data[offset / 8], 0, 0); +    case 16: +        return u128data[offset / 16]; +    } +    return uvec4(0); +} + +void main() { +    uvec2 pos = gl_GlobalInvocationID.xy + origin; + +    uint offset = 0; +    offset += pos.x * bytes_per_block; +    offset += pos.y * pitch; + +    const uvec4 texel = ReadTexel(offset); +    const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination; +    imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag new file mode 100644 index 000000000..4a6aae410 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag @@ -0,0 +1,14 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D tex; + +layout(location = 0) in vec2 texcoord; +layout(location = 0) out vec4 color; + +void main() { +    color = textureLod(tex, texcoord, 0); +} diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag new file mode 100644 index 000000000..19bb23a5a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +#extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) in vec2 texcoord; + +void main() { +    gl_FragDepth = textureLod(depth_tex, texcoord, 0).r; +    gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r; +} diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/host_shaders/vulkan_present.frag index a06ecd24a..0979ff3e6 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.frag +++ b/src/video_core/host_shaders/vulkan_present.frag @@ -2,15 +2,6 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ -  #version 460 core  layout (location = 0) in vec2 frag_tex_coord; diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/host_shaders/vulkan_present.vert index c64d9235a..00b868958 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.vert +++ b/src/video_core/host_shaders/vulkan_present.vert @@ -2,15 +2,6 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ -  #version 460 core  layout (location = 0) in vec2 vert_position; diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp index 5a5703308..212f4e998 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_array.comp +++ b/src/video_core/host_shaders/vulkan_quad_array.comp @@ -2,15 +2,6 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ -  #version 460 core  layout (local_size_x = 1024) in; diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp index 5a472ba9b..8655591d0 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp @@ -2,15 +2,6 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V quad_indexed.comp -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ -  #version 460 core  layout (local_size_x = 1024) in; diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp index a320f3ae0..ad74d7af9 100644 --- a/src/video_core/renderer_vulkan/shaders/uint8.comp +++ b/src/video_core/host_shaders/vulkan_uint8.comp @@ -2,15 +2,6 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ -  #version 460 core  #extension GL_EXT_shader_16bit_storage : require  #extension GL_EXT_shader_8bit_storage : require diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6e70bd362..65feff588 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {      }      // Flush and invalidate through the GPU interface, to be asynchronous if possible. -    system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); +    const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); +    ASSERT(cpu_addr); + +    rasterizer->UnmapMemory(*cpu_addr, size);      UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);  } diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 9da9fb4ff..e69de29bb 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -1,250 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> -#include <cstring> -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/morton.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCore { - -using Surface::GetBytesPerPixel; -using Surface::PixelFormat; - -using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); -using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; - -template <bool morton_to_linear, PixelFormat format> -static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, -                       u32 tile_width_spacing, u8* buffer, u8* addr) { -    constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); - -    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual -    // pixel values. -    constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; -    constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; - -    if constexpr (morton_to_linear) { -        Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, -                                         stride, height, depth, block_height, block_depth, -                                         tile_width_spacing); -    } else { -        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, -                                         (height + tile_size_y - 1) / tile_size_y, depth, -                                         bytes_per_pixel, bytes_per_pixel, addr, buffer, false, -                                         block_height, block_depth, tile_width_spacing); -    } -} - -static constexpr ConversionArray morton_to_linear_fns = { -    MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>, -    MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>, -    MortonCopy<true, PixelFormat::A8B8G8R8_SINT>, -    MortonCopy<true, PixelFormat::A8B8G8R8_UINT>, -    MortonCopy<true, PixelFormat::R5G6B5_UNORM>, -    MortonCopy<true, PixelFormat::B5G6R5_UNORM>, -    MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>, -    MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>, -    MortonCopy<true, PixelFormat::A2B10G10R10_UINT>, -    MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>, -    MortonCopy<true, PixelFormat::R8_UNORM>, -    MortonCopy<true, PixelFormat::R8_SNORM>, -    MortonCopy<true, PixelFormat::R8_SINT>, -    MortonCopy<true, PixelFormat::R8_UINT>, -    MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>, -    MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>, -    MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>, -    MortonCopy<true, PixelFormat::R16G16B16A16_SINT>, -    MortonCopy<true, PixelFormat::R16G16B16A16_UINT>, -    MortonCopy<true, PixelFormat::B10G11R11_FLOAT>, -    MortonCopy<true, PixelFormat::R32G32B32A32_UINT>, -    MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>, -    MortonCopy<true, PixelFormat::BC2_UNORM>, -    MortonCopy<true, PixelFormat::BC3_UNORM>, -    MortonCopy<true, PixelFormat::BC4_UNORM>, -    MortonCopy<true, PixelFormat::BC4_SNORM>, -    MortonCopy<true, PixelFormat::BC5_UNORM>, -    MortonCopy<true, PixelFormat::BC5_SNORM>, -    MortonCopy<true, PixelFormat::BC7_UNORM>, -    MortonCopy<true, PixelFormat::BC6H_UFLOAT>, -    MortonCopy<true, PixelFormat::BC6H_SFLOAT>, -    MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>, -    MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>, -    MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>, -    MortonCopy<true, PixelFormat::R32G32B32A32_SINT>, -    MortonCopy<true, PixelFormat::R32G32_FLOAT>, -    MortonCopy<true, PixelFormat::R32G32_SINT>, -    MortonCopy<true, PixelFormat::R32_FLOAT>, -    MortonCopy<true, PixelFormat::R16_FLOAT>, -    MortonCopy<true, PixelFormat::R16_UNORM>, -    MortonCopy<true, PixelFormat::R16_SNORM>, -    MortonCopy<true, PixelFormat::R16_UINT>, -    MortonCopy<true, PixelFormat::R16_SINT>, -    MortonCopy<true, PixelFormat::R16G16_UNORM>, -    MortonCopy<true, PixelFormat::R16G16_FLOAT>, -    MortonCopy<true, PixelFormat::R16G16_UINT>, -    MortonCopy<true, PixelFormat::R16G16_SINT>, -    MortonCopy<true, PixelFormat::R16G16_SNORM>, -    MortonCopy<true, PixelFormat::R32G32B32_FLOAT>, -    MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>, -    MortonCopy<true, PixelFormat::R8G8_UNORM>, -    MortonCopy<true, PixelFormat::R8G8_SNORM>, -    MortonCopy<true, PixelFormat::R8G8_SINT>, -    MortonCopy<true, PixelFormat::R8G8_UINT>, -    MortonCopy<true, PixelFormat::R32G32_UINT>, -    MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>, -    MortonCopy<true, PixelFormat::R32_UINT>, -    MortonCopy<true, PixelFormat::R32_SINT>, -    MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>, -    MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>, -    MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>, -    MortonCopy<true, PixelFormat::BC2_SRGB>, -    MortonCopy<true, PixelFormat::BC3_SRGB>, -    MortonCopy<true, PixelFormat::BC7_SRGB>, -    MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, -    MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>, -    MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, -    MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>, -    MortonCopy<true, PixelFormat::D32_FLOAT>, -    MortonCopy<true, PixelFormat::D16_UNORM>, -    MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>, -    MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>, -    MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>, -}; - -static constexpr ConversionArray linear_to_morton_fns = { -    MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>, -    MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>, -    MortonCopy<false, PixelFormat::A8B8G8R8_SINT>, -    MortonCopy<false, PixelFormat::A8B8G8R8_UINT>, -    MortonCopy<false, PixelFormat::R5G6B5_UNORM>, -    MortonCopy<false, PixelFormat::B5G6R5_UNORM>, -    MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>, -    MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>, -    MortonCopy<false, PixelFormat::A2B10G10R10_UINT>, -    MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>, -    MortonCopy<false, PixelFormat::R8_UNORM>, -    MortonCopy<false, PixelFormat::R8_SNORM>, -    MortonCopy<false, PixelFormat::R8_SINT>, -    MortonCopy<false, PixelFormat::R8_UINT>, -    MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>, -    MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>, -    MortonCopy<false, PixelFormat::R16G16B16A16_SINT>, -    MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>, -    MortonCopy<false, PixelFormat::R16G16B16A16_UINT>, -    MortonCopy<false, PixelFormat::B10G11R11_FLOAT>, -    MortonCopy<false, PixelFormat::R32G32B32A32_UINT>, -    MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>, -    MortonCopy<false, PixelFormat::BC2_UNORM>, -    MortonCopy<false, PixelFormat::BC3_UNORM>, -    MortonCopy<false, PixelFormat::BC4_UNORM>, -    MortonCopy<false, PixelFormat::BC4_SNORM>, -    MortonCopy<false, PixelFormat::BC5_UNORM>, -    MortonCopy<false, PixelFormat::BC5_SNORM>, -    MortonCopy<false, PixelFormat::BC7_UNORM>, -    MortonCopy<false, PixelFormat::BC6H_UFLOAT>, -    MortonCopy<false, PixelFormat::BC6H_SFLOAT>, -    // TODO(Subv): Swizzling ASTC formats are not supported -    nullptr, -    MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>, -    MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>, -    MortonCopy<false, PixelFormat::R32G32B32A32_SINT>, -    MortonCopy<false, PixelFormat::R32G32_FLOAT>, -    MortonCopy<false, PixelFormat::R32G32_SINT>, -    MortonCopy<false, PixelFormat::R32_FLOAT>, -    MortonCopy<false, PixelFormat::R16_FLOAT>, -    MortonCopy<false, PixelFormat::R16_UNORM>, -    MortonCopy<false, PixelFormat::R16_SNORM>, -    MortonCopy<false, PixelFormat::R16_UINT>, -    MortonCopy<false, PixelFormat::R16_SINT>, -    MortonCopy<false, PixelFormat::R16G16_UNORM>, -    MortonCopy<false, PixelFormat::R16G16_FLOAT>, -    MortonCopy<false, PixelFormat::R16G16_UINT>, -    MortonCopy<false, PixelFormat::R16G16_SINT>, -    MortonCopy<false, PixelFormat::R16G16_SNORM>, -    MortonCopy<false, PixelFormat::R32G32B32_FLOAT>, -    MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>, -    MortonCopy<false, PixelFormat::R8G8_UNORM>, -    MortonCopy<false, PixelFormat::R8G8_SNORM>, -    MortonCopy<false, PixelFormat::R8G8_SINT>, -    MortonCopy<false, PixelFormat::R8G8_UINT>, -    MortonCopy<false, PixelFormat::R32G32_UINT>, -    MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>, -    MortonCopy<false, PixelFormat::R32_UINT>, -    MortonCopy<false, PixelFormat::R32_SINT>, -    nullptr, -    nullptr, -    nullptr, -    MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>, -    MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>, -    MortonCopy<false, PixelFormat::BC2_SRGB>, -    MortonCopy<false, PixelFormat::BC3_SRGB>, -    MortonCopy<false, PixelFormat::BC7_SRGB>, -    MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    nullptr, -    MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>, -    MortonCopy<false, PixelFormat::D32_FLOAT>, -    MortonCopy<false, PixelFormat::D16_UNORM>, -    MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>, -    MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>, -    MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>, -}; - -static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { -    switch (mode) { -    case MortonSwizzleMode::MortonToLinear: -        return morton_to_linear_fns[static_cast<std::size_t>(format)]; -    case MortonSwizzleMode::LinearToMorton: -        return linear_to_morton_fns[static_cast<std::size_t>(format)]; -    } -    UNREACHABLE(); -    return morton_to_linear_fns[static_cast<std::size_t>(format)]; -} - -void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, -                   u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, -                   u8* buffer, u8* addr) { -    GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, -                                     tile_width_spacing, buffer, addr); -} - -} // namespace VideoCore diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b714a7e3f..e69de29bb 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h @@ -1,18 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" -#include "video_core/surface.h" - -namespace VideoCore { - -enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; - -void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, -                   u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, -                   u8* buffer, u8* addr); - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 27ef4c69a..0cb0f387d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -76,6 +76,9 @@ public:      /// Sync memory between guest and host.      virtual void SyncGuestHost() = 0; +    /// Unmap memory range +    virtual void UnmapMemory(VAddr addr, u64 size) = 0; +      /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory      /// and invalidated      virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; @@ -83,6 +86,12 @@ public:      /// Notify the host renderer to wait for previous primitive and compute operations.      virtual void WaitForIdle() = 0; +    /// Notify the host renderer to wait for reads and writes to render targets and flush caches. +    virtual void FragmentBarrier() = 0; + +    /// Notify the host renderer to make available previous render target writes. +    virtual void TiledCacheBarrier() = 0; +      /// Notify the rasterizer to send all written commands to the host GPU.      virtual void FlushCommands() = 0; @@ -91,8 +100,7 @@ public:      /// Attempt to use a faster method to perform a surface copy      [[nodiscard]] virtual bool AccelerateSurfaceCopy( -        const Tegra::Engines::Fermi2D::Regs::Surface& src, -        const Tegra::Engines::Fermi2D::Regs::Surface& dst, +        const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,          const Tegra::Engines::Fermi2D::Config& copy_config) {          return false;      } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 60735d502..5772cad87 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst  OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,                                 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, -                               const Device& device_, std::size_t stream_size_) -    : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, -                         std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)}, -      device{device_} { +                               const Device& device_, OGLStreamBuffer& stream_buffer_, +                               StateTracker& state_tracker) +    : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {      if (!device.HasFastBufferSubData()) {          return;      } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 95251e26b..17ee90316 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -22,6 +22,7 @@ namespace OpenGL {  class Device;  class OGLStreamBuffer;  class RasterizerOpenGL; +class StateTracker;  class Buffer : public VideoCommon::BufferBlock {  public: @@ -52,9 +53,10 @@ private:  using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;  class OGLBufferCache final : public GenericBufferCache {  public: -    explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, -                            Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, -                            const Device& device_, std::size_t stream_size_); +    explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, +                            Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, +                            const Device& device, OGLStreamBuffer& stream_buffer, +                            StateTracker& state_tracker);      ~OGLBufferCache();      BufferInfo GetEmptyBuffer(std::size_t) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a94e4f72e..b24179d59 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,9 +5,11 @@  #include <algorithm>  #include <array>  #include <cstddef> +#include <cstdlib>  #include <cstring>  #include <limits>  #include <optional> +#include <span>  #include <vector>  #include <glad/glad.h> @@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;  constexpr u32 NumStages = 5; -constexpr std::array LimitUBOs = { +constexpr std::array LIMIT_UBOS = {      GL_MAX_VERTEX_UNIFORM_BLOCKS,          GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,      GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, -    GL_MAX_FRAGMENT_UNIFORM_BLOCKS,        GL_MAX_COMPUTE_UNIFORM_BLOCKS}; - -constexpr std::array LimitSSBOs = { +    GL_MAX_FRAGMENT_UNIFORM_BLOCKS,        GL_MAX_COMPUTE_UNIFORM_BLOCKS, +}; +constexpr std::array LIMIT_SSBOS = {      GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS,          GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,      GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, -    GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS,        GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; - -constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, -                                      GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, -                                      GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, -                                      GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, -                                      GL_MAX_TEXTURE_IMAGE_UNITS, -                                      GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; - -constexpr std::array LimitImages = { +    GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS,        GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, +}; +constexpr std::array LIMIT_SAMPLERS = { +    GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, +    GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, +    GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, +    GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, +    GL_MAX_TEXTURE_IMAGE_UNITS, +    GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, +}; +constexpr std::array LIMIT_IMAGES = {      GL_MAX_VERTEX_IMAGE_UNIFORMS,          GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,      GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, -    GL_MAX_FRAGMENT_IMAGE_UNIFORMS,        GL_MAX_COMPUTE_IMAGE_UNIFORMS}; +    GL_MAX_FRAGMENT_IMAGE_UNIFORMS,        GL_MAX_COMPUTE_IMAGE_UNIFORMS, +};  template <typename T>  T GetInteger(GLenum pname) { @@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {      return extensions;  } -bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { -    return std::find(images.begin(), images.end(), extension) != images.end(); +bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) { +    return std::ranges::find(extensions, extension) != extensions.end();  }  u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { @@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {  std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {      std::array<u32, Tegra::Engines::MaxShaderTypes> max; -    std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), -                   [](GLenum pname) { return GetInteger<u32>(pname); }); +    std::ranges::transform(LIMIT_UBOS, max.begin(), +                           [](GLenum pname) { return GetInteger<u32>(pname); });      return max;  } @@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin      for (std::size_t i = 0; i < NumStages; ++i) {          const std::size_t stage = stage_swizzle[i];          bindings[stage] = { -            Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), -            Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), -            Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; +            Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), +            Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), +            Extract(base_samplers, num_samplers, total_samplers / NumStages, +                    LIMIT_SAMPLERS[stage])};      }      u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); @@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin      // Reserve at least 4 image bindings on the fragment stage.      bindings[4].image = -        Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); +        Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);      // This is guaranteed to be at least 1.      const u32 total_extracted_images = num_images / (NumStages - 1); @@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin              continue;          }          bindings[stage].image = -            Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); +            Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);      }      // Compute doesn't care about any of this. @@ -188,6 +193,11 @@ bool IsASTCSupported() {      return true;  } +[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { +    const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); +    return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); +} +  } // Anonymous namespace  Device::Device() @@ -206,9 +216,8 @@ Device::Device()              "Beta driver 443.24 is known to have issues. There might be performance issues.");          disable_fast_buffer_sub_data = true;      } - -    uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); -    shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); +    uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); +    shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);      max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);      max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);      max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); @@ -224,6 +233,7 @@ Device::Device()      has_precise_bug = TestPreciseBug();      has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;      has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; +    has_debugging_tool_attached = IsDebugToolAttached(extensions);      // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive      // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8a4b6b9fc..13e66846c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -36,11 +36,11 @@ public:          return GetBaseBindings(static_cast<std::size_t>(shader_type));      } -    std::size_t GetUniformBufferAlignment() const { +    size_t GetUniformBufferAlignment() const {          return uniform_buffer_alignment;      } -    std::size_t GetShaderStorageBufferAlignment() const { +    size_t GetShaderStorageBufferAlignment() const {          return shader_storage_alignment;      } @@ -104,6 +104,10 @@ public:          return has_nv_viewport_array2;      } +    bool HasDebuggingToolAttached() const { +        return has_debugging_tool_attached; +    } +      bool UseAssemblyShaders() const {          return use_assembly_shaders;      } @@ -118,8 +122,8 @@ private:      std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};      std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; -    std::size_t uniform_buffer_alignment{}; -    std::size_t shader_storage_alignment{}; +    size_t uniform_buffer_alignment{}; +    size_t shader_storage_alignment{};      u32 max_vertex_attributes{};      u32 max_varyings{};      u32 max_compute_shared_memory_size{}; @@ -135,6 +139,7 @@ private:      bool has_precise_bug{};      bool has_fast_buffer_sub_data{};      bool has_nv_viewport_array2{}; +    bool has_debugging_tool_attached{};      bool use_assembly_shaders{};      bool use_asynchronous_shaders{};  }; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 6040646cb..3e9c922f5 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -46,7 +46,7 @@ void GLInnerFence::Wait() {  }  FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, -                                       Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, +                                       Tegra::GPU& gpu_, TextureCache& texture_cache_,                                         OGLBufferCache& buffer_cache_, QueryCache& query_cache_)      : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 39ca6125b..30dbee613 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -33,12 +33,12 @@ private:  using Fence = std::shared_ptr<GLInnerFence>;  using GenericFenceManager = -    VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; +    VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;  class FenceManagerOpenGL final : public GenericFenceManager {  public:      explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, -                                TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, +                                TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,                                  QueryCache& query_cache_);  protected: diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp deleted file mode 100644 index b8a512cb6..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <tuple> -#include <unordered_map> -#include <utility> - -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" - -namespace OpenGL { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using VideoCore::Surface::SurfaceType; - -FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; - -FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; - -GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { -    const auto [entry, is_cache_miss] = cache.try_emplace(key); -    auto& framebuffer{entry->second}; -    if (is_cache_miss) { -        framebuffer = CreateFramebuffer(key); -    } -    return framebuffer.handle; -} - -OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { -    OGLFramebuffer framebuffer; -    framebuffer.Create(); - -    // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. -    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); - -    if (key.zeta) { -        const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; -        const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; -        key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER); -    } - -    std::size_t num_buffers = 0; -    std::array<GLenum, Maxwell::NumRenderTargets> targets; - -    for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { -        if (!key.colors[index]) { -            targets[index] = GL_NONE; -            continue; -        } -        const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index); -        key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - -        const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111; -        targets[index] = GL_COLOR_ATTACHMENT0 + attachment; -        num_buffers = index + 1; -    } - -    if (num_buffers > 0) { -        glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets)); -    } else { -        glDrawBuffer(GL_NONE); -    } - -    return framebuffer; -} - -std::size_t FramebufferCacheKey::Hash() const noexcept { -    std::size_t hash = std::hash<View>{}(zeta); -    for (const auto& color : colors) { -        hash ^= std::hash<View>{}(color); -    } -    hash ^= static_cast<std::size_t>(color_attachments) << 16; -    return hash; -} - -bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept { -    return std::tie(colors, zeta, color_attachments) == -           std::tie(rhs.colors, rhs.zeta, rhs.color_attachments); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h deleted file mode 100644 index 8f698fee0..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <cstddef> -#include <unordered_map> - -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace OpenGL { - -constexpr std::size_t BitsPerAttachment = 4; - -struct FramebufferCacheKey { -    View zeta; -    std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors; -    u32 color_attachments = 0; - -    std::size_t Hash() const noexcept; - -    bool operator==(const FramebufferCacheKey& rhs) const noexcept; - -    bool operator!=(const FramebufferCacheKey& rhs) const noexcept { -        return !operator==(rhs); -    } - -    void SetAttachment(std::size_t index, u32 attachment) { -        color_attachments |= attachment << (BitsPerAttachment * index); -    } -}; - -} // namespace OpenGL - -namespace std { - -template <> -struct hash<OpenGL::FramebufferCacheKey> { -    std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { -        return k.Hash(); -    } -}; - -} // namespace std - -namespace OpenGL { - -class FramebufferCacheOpenGL { -public: -    FramebufferCacheOpenGL(); -    ~FramebufferCacheOpenGL(); - -    GLuint GetFramebuffer(const FramebufferCacheKey& key); - -private: -    OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); - -    std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e58e84759..8aa63d329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,12 +25,15 @@  #include "video_core/engines/maxwell_3d.h"  #include "video_core/engines/shader_type.h"  #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_query_cache.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_texture_cache.h"  #include "video_core/renderer_opengl/maxwell_to_gl.h"  #include "video_core/renderer_opengl/renderer_opengl.h"  #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h"  namespace OpenGL { @@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255  namespace { -constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; -constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = +constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; +constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =      NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; -constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = +constexpr size_t TOTAL_CONST_BUFFER_BYTES =      NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; + +constexpr size_t MAX_TEXTURES = 192; +constexpr size_t MAX_IMAGES = 48; + +struct TextureHandle { +    constexpr TextureHandle(u32 data, bool via_header_index) { +        const Tegra::Texture::TextureHandle handle{data}; +        image = handle.tic_id; +        sampler = via_header_index ? image : handle.tsc_id.Value(); +    } + +    u32 image; +    u32 sampler; +};  template <typename Engine, typename Entry> -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, -                                               ShaderType shader_type, std::size_t index = 0) { +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, +                             ShaderType shader_type, size_t index = 0) {      if constexpr (std::is_same_v<Entry, SamplerEntry>) {          if (entry.is_separated) {              const u32 buffer_1 = entry.buffer; @@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry              const u32 offset_2 = entry.secondary_offset;              const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);              const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); -            return engine.GetTextureInfo(handle_1 | handle_2); +            return TextureHandle(handle_1 | handle_2, via_header_index);          }      }      if (entry.is_bindless) { -        const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); -        return engine.GetTextureInfo(handle); -    } - -    const auto& gpu_profile = engine.AccessGuestDriverProfile(); -    const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); -    if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { -        return engine.GetStageTexture(shader_type, offset); -    } else { -        return engine.GetTexture(offset); +        const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); +        return TextureHandle(raw, via_header_index);      } +    const u32 buffer = engine.GetBoundBuffer(); +    const u64 offset = (entry.offset + index) * sizeof(u32); +    return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);  }  std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, @@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,      if (!entry.IsIndirect()) {          return entry.GetSize();      } -      if (buffer.size > Maxwell::MaxConstBufferSize) {          LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,                      Maxwell::MaxConstBufferSize); @@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss                                      reinterpret_cast<const GLuint*>(ssbos));  } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { +    if (entry.is_buffer) { +        return ImageViewType::Buffer; +    } +    switch (entry.type) { +    case Tegra::Shader::TextureType::Texture1D: +        return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; +    case Tegra::Shader::TextureType::Texture2D: +        return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; +    case Tegra::Shader::TextureType::Texture3D: +        return ImageViewType::e3D; +    case Tegra::Shader::TextureType::TextureCube: +        return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; +    } +    UNREACHABLE(); +    return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { +    switch (entry.type) { +    case Tegra::Shader::ImageType::Texture1D: +        return ImageViewType::e1D; +    case Tegra::Shader::ImageType::Texture1DArray: +        return ImageViewType::e1DArray; +    case Tegra::Shader::ImageType::Texture2D: +        return ImageViewType::e2D; +    case Tegra::Shader::ImageType::Texture2DArray: +        return ImageViewType::e2DArray; +    case Tegra::Shader::ImageType::Texture3D: +        return ImageViewType::e3D; +    case Tegra::Shader::ImageType::TextureBuffer: +        return ImageViewType::Buffer; +    } +    UNREACHABLE(); +    return ImageViewType::e2D; +} +  } // Anonymous namespace  RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,                                     Core::Memory::Memory& cpu_memory_, const Device& device_,                                     ScreenInfo& screen_info_, ProgramManager& program_manager_,                                     StateTracker& state_tracker_) -    : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), +    : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),        kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),        screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), -      texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), +      stream_buffer(device, state_tracker), +      texture_cache_runtime(device, program_manager, state_tracker), +      texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),        shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),        query_cache(*this, maxwell3d, gpu_memory), -      buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), +      buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),        fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),        async_shaders(emu_window_) { -    CheckExtensions(); -      unified_uniform_buffer.Create();      glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); @@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra                                   nullptr, 0);          }      } -      if (device.UseAsynchronousShaders()) {          async_shaders.AllocateWorkers();      } @@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {      }  } -void RasterizerOpenGL::CheckExtensions() { -    if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { -        LOG_WARNING( -            Render_OpenGL, -            "Anisotropic filter is not supported! This can cause graphical issues in some games."); -    } -} -  void RasterizerOpenGL::SetupVertexFormat() {      auto& flags = maxwell3d.dirty.flags;      if (!flags[Dirty::VertexFormats]) { @@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {      return info.offset;  } -void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { +void RasterizerOpenGL::SetupShaders() {      MICROPROFILE_SCOPE(OpenGL_Shader);      u32 clip_distances = 0; +    std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; +    image_view_indices.clear(); +    sampler_handles.clear(); + +    texture_cache.SynchronizeGraphicsDescriptors(); +      for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {          const auto& shader_config = maxwell3d.regs.shader_config[index];          const auto program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {              }              continue;          } -          // Currently this stages are not supported in the OpenGL backend.          // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL          if (program == Maxwell::ShaderProgram::TesselationControl || @@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {          }          Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); -          const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;          switch (program) {          case Maxwell::ShaderProgram::VertexA: @@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {          default:              UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,                                shader_config.enable.Value(), shader_config.offset); +            break;          }          // Stage indices are 0 - 5 -        const std::size_t stage = index == 0 ? 0 : index - 1; +        const size_t stage = index == 0 ? 0 : index - 1; +        shaders[stage] = shader; +          SetupDrawConstBuffers(stage, shader);          SetupDrawGlobalMemory(stage, shader); -        SetupDrawTextures(stage, shader); -        SetupDrawImages(stage, shader); +        SetupDrawTextures(shader, stage); +        SetupDrawImages(shader, stage);          // Workaround for Intel drivers.          // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {              ++index;          }      } -      SyncClipEnabled(clip_distances);      maxwell3d.dirty.flags[Dirty::Shaders] = false; + +    const std::span indices_span(image_view_indices.data(), image_view_indices.size()); +    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + +    size_t image_view_index = 0; +    size_t texture_index = 0; +    size_t image_index = 0; +    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { +        const Shader* const shader = shaders[stage]; +        if (shader) { +            const auto base = device.GetBaseBindings(stage); +            BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, +                         texture_index, image_index); +        } +    }  }  std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s      shader_cache.LoadDiskCache(title_id, stop_loading, callback);  } -void RasterizerOpenGL::ConfigureFramebuffers() { -    MICROPROFILE_SCOPE(OpenGL_Framebuffer); -    if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { -        return; -    } -    maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; - -    texture_cache.GuardRenderTargets(true); - -    View depth_surface = texture_cache.GetDepthBufferSurface(true); - -    const auto& regs = maxwell3d.regs; -    UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); - -    // Bind the framebuffer surfaces -    FramebufferCacheKey key; -    const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); -    for (std::size_t index = 0; index < colors_count; ++index) { -        View color_surface{texture_cache.GetColorBufferSurface(index, true)}; -        if (!color_surface) { -            continue; -        } -        // Assume that a surface will be written to if it is used as a framebuffer, even -        // if the shader doesn't actually write to it. -        texture_cache.MarkColorBufferInUse(index); - -        key.SetAttachment(index, regs.rt_control.GetMap(index)); -        key.colors[index] = std::move(color_surface); -    } - -    if (depth_surface) { -        // Assume that a surface will be written to if it is used as a framebuffer, even if -        // the shader doesn't actually write to it. -        texture_cache.MarkDepthBufferInUse(); -        key.zeta = std::move(depth_surface); -    } - -    texture_cache.GuardRenderTargets(false); - -    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - -void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { -    const auto& regs = maxwell3d.regs; - -    texture_cache.GuardRenderTargets(true); -    View color_surface; - -    if (using_color) { -        // Determine if we have to preserve the contents. -        // First we have to make sure all clear masks are enabled. -        bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || -                                 !regs.clear_buffers.B || !regs.clear_buffers.A; -        const std::size_t index = regs.clear_buffers.RT; -        if (regs.clear_flags.scissor) { -            // Then we have to confirm scissor testing clears the whole image. -            const auto& scissor = regs.scissor_test[0]; -            preserve_contents |= scissor.min_x > 0; -            preserve_contents |= scissor.min_y > 0; -            preserve_contents |= scissor.max_x < regs.rt[index].width; -            preserve_contents |= scissor.max_y < regs.rt[index].height; -        } - -        color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); -        texture_cache.MarkColorBufferInUse(index); -    } - -    View depth_surface; -    if (using_depth_stencil) { -        bool preserve_contents = false; -        if (regs.clear_flags.scissor) { -            // For depth stencil clears we only have to confirm scissor test covers the whole image. -            const auto& scissor = regs.scissor_test[0]; -            preserve_contents |= scissor.min_x > 0; -            preserve_contents |= scissor.min_y > 0; -            preserve_contents |= scissor.max_x < regs.zeta_width; -            preserve_contents |= scissor.max_y < regs.zeta_height; -        } - -        depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); -        texture_cache.MarkDepthBufferInUse(); -    } -    texture_cache.GuardRenderTargets(false); - -    FramebufferCacheKey key; -    key.colors[0] = std::move(color_surface); -    key.zeta = std::move(depth_surface); - -    state_tracker.NotifyFramebuffer(); -    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} -  void RasterizerOpenGL::Clear() {      if (!maxwell3d.ShouldExecute()) {          return; @@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {          regs.clear_buffers.A) {          use_color = true; -        state_tracker.NotifyColorMask0(); -        glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, +        const GLuint index = regs.clear_buffers.RT; +        state_tracker.NotifyColorMask(index); +        glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,                       regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);          // TODO(Rodrigo): Determine if clamping is used on clears @@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {          state_tracker.NotifyScissor0();          glDisablei(GL_SCISSOR_TEST, 0);      } -      UNIMPLEMENTED_IF(regs.clear_flags.viewport); -    ConfigureClearFramebuffer(use_color, use_depth || use_stencil); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.UpdateRenderTargets(true); +        state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); +    }      if (use_color) { -        glClearBufferfv(GL_COLOR, 0, regs.clear_color); +        glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);      } -      if (use_depth && use_stencil) {          glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);      } else if (use_depth) { @@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {                     (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());      // Prepare the vertex array. -    const bool invalidated = buffer_cache.Map(buffer_size); - -    if (invalidated) { -        // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty -        auto& dirty = maxwell3d.dirty.flags; -        dirty[Dirty::VertexBuffers] = true; -        for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { -            dirty[index] = true; -        } -    } +    buffer_cache.Map(buffer_size);      // Prepare vertex array format.      SetupVertexFormat(); @@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {      }      // Setup shaders and their used resources. -    texture_cache.GuardSamplers(true); -    const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); -    SetupShaders(primitive_mode); -    texture_cache.GuardSamplers(false); - -    ConfigureFramebuffers(); +    auto lock = texture_cache.AcquireLock(); +    SetupShaders();      // Signal the buffer cache that we are not going to upload more things.      buffer_cache.Unmap(); - +    texture_cache.UpdateRenderTargets(false); +    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());      program_manager.BindGraphicsPipeline(); -    if (texture_cache.TextureBarrier()) { -        glTextureBarrier(); -    } - +    const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);      BeginTransformFeedback(primitive_mode);      const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); @@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {      buffer_cache.Acquire();      current_cbuf = 0; -    auto kernel = shader_cache.GetComputeKernel(code_addr); -    program_manager.BindCompute(kernel->GetHandle()); +    Shader* const kernel = shader_cache.GetComputeKernel(code_addr); -    SetupComputeTextures(kernel); -    SetupComputeImages(kernel); +    auto lock = texture_cache.AcquireLock(); +    BindComputeTextures(kernel); -    const std::size_t buffer_size = -        Tegra::Engines::KeplerCompute::NumConstBuffers * -        (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); +    const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * +                               (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());      buffer_cache.Map(buffer_size);      SetupComputeConstBuffers(kernel); @@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {      buffer_cache.Unmap();      const auto& launch_desc = kepler_compute.launch_description; -    program_manager.BindCompute(kernel->GetHandle());      glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);      ++num_queued_commands;  } @@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {      if (addr == 0 || size == 0) {          return;      } -    texture_cache.FlushRegion(addr, size); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.DownloadMemory(addr, size); +    }      buffer_cache.FlushRegion(addr, size);      query_cache.FlushRegion(addr, size);  } @@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {      if (!Settings::IsGPULevelHigh()) {          return buffer_cache.MustFlushRegion(addr, size);      } -    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); +    return texture_cache.IsRegionGpuModified(addr, size) || +           buffer_cache.MustFlushRegion(addr, size);  }  void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { @@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {      if (addr == 0 || size == 0) {          return;      } -    texture_cache.InvalidateRegion(addr, size); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.WriteMemory(addr, size); +    }      shader_cache.InvalidateRegion(addr, size);      buffer_cache.InvalidateRegion(addr, size);      query_cache.InvalidateRegion(addr, size); @@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {      if (addr == 0 || size == 0) {          return;      } -    texture_cache.OnCPUWrite(addr, size); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.WriteMemory(addr, size); +    }      shader_cache.OnCPUWrite(addr, size);      buffer_cache.OnCPUWrite(addr, size);  }  void RasterizerOpenGL::SyncGuestHost() {      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    texture_cache.SyncGuestHost();      buffer_cache.SyncGuestHost();      shader_cache.SyncGuestHost();  } +void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.UnmapMemory(addr, size); +    } +    buffer_cache.OnCPUWrite(addr, size); +    shader_cache.OnCPUWrite(addr, size); +} +  void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {      if (!gpu.IsAsync()) {          gpu_memory.Write<u32>(addr, value); @@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {                      GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);  } +void RasterizerOpenGL::FragmentBarrier() { +    glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); +} + +void RasterizerOpenGL::TiledCacheBarrier() { +    glTextureBarrier(); +} +  void RasterizerOpenGL::FlushCommands() {      // Only flush when we have commands queued to OpenGL.      if (num_queued_commands == 0) { @@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() {      // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.      num_queued_commands = 0; +    fence_manager.TickFrame();      buffer_cache.TickFrame(); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.TickFrame(); +    }  } -bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, -                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, +                                             const Tegra::Engines::Fermi2D::Surface& dst,                                               const Tegra::Engines::Fermi2D::Config& copy_config) {      MICROPROFILE_SCOPE(OpenGL_Blits); -    texture_cache.DoFermiCopy(src, dst, copy_config); +    auto lock = texture_cache.AcquireLock(); +    texture_cache.BlitImage(dst, src, copy_config);      return true;  }  bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,                                           VAddr framebuffer_addr, u32 pixel_stride) { -    if (!framebuffer_addr) { -        return {}; +    if (framebuffer_addr == 0) { +        return false;      } -      MICROPROFILE_SCOPE(OpenGL_CacheManagement); -    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; -    if (!surface) { -        return {}; +    auto lock = texture_cache.AcquireLock(); +    ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; +    if (!image_view) { +        return false;      } -      // Verify that the cached surface is the same size and format as the requested framebuffer -    const auto& params{surface->GetSurfaceParams()}; -    const auto& pixel_format{ -        VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; -    ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); -    ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); +    // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); +    // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); -    if (params.pixel_format != pixel_format) { -        LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); -    } +    screen_info.display_texture = image_view->Handle(ImageViewType::e2D); +    screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); +    return true; +} -    screen_info.display_texture = surface->GetTexture(); -    screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; +void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { +    image_view_indices.clear(); +    sampler_handles.clear(); -    return true; +    texture_cache.SynchronizeComputeDescriptors(); + +    SetupComputeTextures(kernel); +    SetupComputeImages(kernel); + +    const std::span indices_span(image_view_indices.data(), image_view_indices.size()); +    texture_cache.FillComputeImageViews(indices_span, image_view_ids); + +    program_manager.BindCompute(kernel->GetHandle()); +    size_t image_view_index = 0; +    size_t texture_index = 0; +    size_t image_index = 0; +    BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); +} + +void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, +                                    GLuint base_image, size_t& image_view_index, +                                    size_t& texture_index, size_t& image_index) { +    const GLuint* const samplers = sampler_handles.data() + texture_index; +    const GLuint* const textures = texture_handles.data() + texture_index; +    const GLuint* const images = image_handles.data() + image_index; + +    const size_t num_samplers = entries.samplers.size(); +    for (const auto& sampler : entries.samplers) { +        for (size_t i = 0; i < sampler.size; ++i) { +            const ImageViewId image_view_id = image_view_ids[image_view_index++]; +            const ImageView& image_view = texture_cache.GetImageView(image_view_id); +            const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); +            texture_handles[texture_index++] = handle; +        } +    } +    const size_t num_images = entries.images.size(); +    for (size_t unit = 0; unit < num_images; ++unit) { +        // TODO: Mark as modified +        const ImageViewId image_view_id = image_view_ids[image_view_index++]; +        const ImageView& image_view = texture_cache.GetImageView(image_view_id); +        const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); +        image_handles[image_index] = handle; +        ++image_index; +    } +    if (num_samplers > 0) { +        glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); +        glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); +    } +    if (num_images > 0) { +        glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); +    }  }  void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { @@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh          GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,          GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,      }; -      const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};      const auto& entries{shader->GetEntries().global_memory_entries}; @@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e      }  } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { -    MICROPROFILE_SCOPE(OpenGL_Texture); -    u32 binding = device.GetBaseBindings(stage_index).sampler; +void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { +    const bool via_header_index = +        maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;      for (const auto& entry : shader->GetEntries().samplers) {          const auto shader_type = static_cast<ShaderType>(stage_index); -        for (std::size_t i = 0; i < entry.size; ++i) { -            const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); -            SetupTexture(binding++, texture, entry); +        for (size_t index = 0; index < entry.size; ++index) { +            const auto handle = +                GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); +            const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); +            sampler_handles.push_back(sampler->Handle()); +            image_view_indices.push_back(handle.image);          }      }  } -void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { -    MICROPROFILE_SCOPE(OpenGL_Texture); -    u32 binding = 0; +void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { +    const bool via_header_index = kepler_compute.launch_description.linked_tsc;      for (const auto& entry : kernel->GetEntries().samplers) { -        for (std::size_t i = 0; i < entry.size; ++i) { -            const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); -            SetupTexture(binding++, texture, entry); +        for (size_t i = 0; i < entry.size; ++i) { +            const auto handle = +                GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); +            const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); +            sampler_handles.push_back(sampler->Handle()); +            image_view_indices.push_back(handle.image);          }      }  } -void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, -                                    const SamplerEntry& entry) { -    const auto view = texture_cache.GetTextureSurface(texture.tic, entry); -    if (!view) { -        // Can occur when texture addr is null or its memory is unmapped/invalid -        glBindSampler(binding, 0); -        glBindTextureUnit(binding, 0); -        return; -    } -    const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, -                                           texture.tic.z_source, texture.tic.w_source); -    glBindTextureUnit(binding, handle); -    if (!view->GetSurfaceParams().IsBuffer()) { -        glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); -    } -} - -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { -    u32 binding = device.GetBaseBindings(stage_index).image; +void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { +    const bool via_header_index = +        maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;      for (const auto& entry : shader->GetEntries().images) {          const auto shader_type = static_cast<ShaderType>(stage_index); -        const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; -        SetupImage(binding++, tic, entry); +        const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); +        image_view_indices.push_back(handle.image);      }  } -void RasterizerOpenGL::SetupComputeImages(Shader* shader) { -    u32 binding = 0; +void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { +    const bool via_header_index = kepler_compute.launch_description.linked_tsc;      for (const auto& entry : shader->GetEntries().images) { -        const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; -        SetupImage(binding++, tic, entry); +        const auto handle = +            GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); +        image_view_indices.push_back(handle.image);      }  } -void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, -                                  const ImageEntry& entry) { -    const auto view = texture_cache.GetImageSurface(tic, entry); -    if (!view) { -        glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); -        return; -    } -    if (entry.is_written) { -        view->MarkAsModified(texture_cache.Tick()); -    } -    const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); -    glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); -} -  void RasterizerOpenGL::SyncViewport() {      auto& flags = maxwell3d.dirty.flags;      const auto& regs = maxwell3d.regs; @@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {      flags[Dirty::PointSize] = false;      oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); +    oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); -    if (maxwell3d.regs.vp_point_size.enable) { -        // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. -        glEnable(GL_PROGRAM_POINT_SIZE); -        return; -    } - -    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid -    // in OpenGL).      glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); -    glDisable(GL_PROGRAM_POINT_SIZE);  }  void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index de28cff15..82e03e677 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,12 +7,13 @@  #include <array>  #include <atomic>  #include <cstddef> -#include <map>  #include <memory>  #include <optional>  #include <tuple>  #include <utility> +#include <boost/container/static_vector.hpp> +  #include <glad/glad.h>  #include "common/common_types.h" @@ -23,16 +24,14 @@  #include "video_core/renderer_opengl/gl_buffer_cache.h"  #include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_fence_manager.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h"  #include "video_core/renderer_opengl/gl_query_cache.h"  #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h"  #include "video_core/renderer_opengl/gl_shader_cache.h"  #include "video_core/renderer_opengl/gl_shader_decompiler.h"  #include "video_core/renderer_opengl/gl_shader_manager.h"  #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h"  #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h"  #include "video_core/shader/async_shaders.h"  #include "video_core/textures/texture.h" @@ -51,7 +50,7 @@ class MemoryManager;  namespace OpenGL {  struct ScreenInfo; -struct DrawParameters; +struct ShaderEntries;  struct BindlessSSBO {      GLuint64EXT address; @@ -79,15 +78,18 @@ public:      void InvalidateRegion(VAddr addr, u64 size) override;      void OnCPUWrite(VAddr addr, u64 size) override;      void SyncGuestHost() override; +    void UnmapMemory(VAddr addr, u64 size) override;      void SignalSemaphore(GPUVAddr addr, u32 value) override;      void SignalSyncPoint(u32 value) override;      void ReleaseFences() override;      void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void WaitForIdle() override; +    void FragmentBarrier() override; +    void TiledCacheBarrier() override;      void FlushCommands() override;      void TickFrame() override; -    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, -                               const Tegra::Engines::Fermi2D::Regs::Surface& dst, +    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, +                               const Tegra::Engines::Fermi2D::Surface& dst,                                 const Tegra::Engines::Fermi2D::Config& copy_config) override;      bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,                             u32 pixel_stride) override; @@ -108,11 +110,14 @@ public:      }  private: -    /// Configures the color and depth framebuffer states. -    void ConfigureFramebuffers(); +    static constexpr size_t MAX_TEXTURES = 192; +    static constexpr size_t MAX_IMAGES = 48; +    static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + +    void BindComputeTextures(Shader* kernel); -    /// Configures the color and depth framebuffer for clearing. -    void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); +    void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, +                      size_t& image_view_index, size_t& texture_index, size_t& image_index);      /// Configures the current constbuffers to use for the draw command.      void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); @@ -136,23 +141,16 @@ private:                             size_t size, BindlessSSBO* ssbo);      /// Configures the current textures to use for the draw command. -    void SetupDrawTextures(std::size_t stage_index, Shader* shader); +    void SetupDrawTextures(const Shader* shader, size_t stage_index);      /// Configures the textures used in a compute shader. -    void SetupComputeTextures(Shader* kernel); - -    /// Configures a texture. -    void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, -                      const SamplerEntry& entry); +    void SetupComputeTextures(const Shader* kernel);      /// Configures images in a graphics shader. -    void SetupDrawImages(std::size_t stage_index, Shader* shader); +    void SetupDrawImages(const Shader* shader, size_t stage_index);      /// Configures images in a compute shader. -    void SetupComputeImages(Shader* shader); - -    /// Configures an image. -    void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); +    void SetupComputeImages(const Shader* shader);      /// Syncs the viewport and depth range to match the guest state      void SyncViewport(); @@ -227,9 +225,6 @@ private:      /// End a transform feedback      void EndTransformFeedback(); -    /// Check for extension that are not strictly required but are needed for correct emulation -    void CheckExtensions(); -      std::size_t CalculateVertexArraysSize() const;      std::size_t CalculateIndexBufferSize() const; @@ -242,7 +237,7 @@ private:      GLintptr SetupIndexBuffer(); -    void SetupShaders(GLenum primitive_mode); +    void SetupShaders();      Tegra::GPU& gpu;      Tegra::Engines::Maxwell3D& maxwell3d; @@ -254,19 +249,21 @@ private:      ProgramManager& program_manager;      StateTracker& state_tracker; -    TextureCacheOpenGL texture_cache; +    OGLStreamBuffer stream_buffer; +    TextureCacheRuntime texture_cache_runtime; +    TextureCache texture_cache;      ShaderCacheOpenGL shader_cache; -    SamplerCacheOpenGL sampler_cache; -    FramebufferCacheOpenGL framebuffer_cache;      QueryCache query_cache;      OGLBufferCache buffer_cache;      FenceManagerOpenGL fence_manager;      VideoCommon::Shader::AsyncShaders async_shaders; -    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - -    GLint vertex_binding = 0; +    boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; +    std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; +    boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; +    std::array<GLuint, MAX_TEXTURES> texture_handles; +    std::array<GLuint, MAX_IMAGES> image_handles;      std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>          transform_feedback_buffers; @@ -280,7 +277,7 @@ private:      std::size_t current_cbuf = 0;      OGLBuffer unified_uniform_buffer; -    /// Number of commands queued to the OpenGL driver. Reseted on flush. +    /// Number of commands queued to the OpenGL driver. Resetted on flush.      std::size_t num_queued_commands = 0;      u32 last_clip_distance_mask = 0; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0ebcec427..0e34a0f20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -71,7 +71,7 @@ void OGLSampler::Create() {          return;      MICROPROFILE_SCOPE(OpenGL_ResourceCreation); -    glGenSamplers(1, &handle); +    glCreateSamplers(1, &handle);  }  void OGLSampler::Release() { diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp deleted file mode 100644 index 5c174879a..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - -namespace OpenGL { - -SamplerCacheOpenGL::SamplerCacheOpenGL() = default; - -SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; - -OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { -    OGLSampler sampler; -    sampler.Create(); - -    const GLuint sampler_id{sampler.handle}; -    glSamplerParameteri( -        sampler_id, GL_TEXTURE_MAG_FILTER, -        MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); -    glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, -                        MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); -    glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); -    glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); -    glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); -    glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, -                        tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); -    glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, -                        MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); -    glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); -    glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); -    glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); -    glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); -    if (GLAD_GL_ARB_texture_filter_anisotropic) { -        glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); -    } else if (GLAD_GL_EXT_texture_filter_anisotropic) { -        glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); -    } else { -        LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); -    } - -    return sampler; -} - -GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { -    return sampler.handle; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h deleted file mode 100644 index 34ee37f00..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <glad/glad.h> - -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/sampler_cache.h" - -namespace OpenGL { - -class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { -public: -    explicit SamplerCacheOpenGL(); -    ~SamplerCacheOpenGL(); - -protected: -    OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - -    GLuint ToSamplerType(const OGLSampler& sampler) const override; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eabfdea5d..d4841fdb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -27,7 +27,6 @@  #include "video_core/renderer_opengl/gl_shader_decompiler.h"  #include "video_core/renderer_opengl/gl_shader_disk_cache.h"  #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h"  #include "video_core/shader/memory_util.h"  #include "video_core/shader/registry.h"  #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ccbdfe967..2e1fa252d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;  using Tegra::Shader::PixelImap;  using Tegra::Shader::Register;  using Tegra::Shader::TextureType; -using VideoCommon::Shader::BuildTransformFeedback; -using VideoCommon::Shader::Registry; -using namespace std::string_literals;  using namespace VideoCommon::Shader; +using namespace std::string_literals;  using Maxwell = Tegra::Engines::Maxwell3D::Regs;  using Operation = const OperationNode&; @@ -2753,11 +2751,11 @@ private:          }      } -    std::string GetSampler(const Sampler& sampler) const { +    std::string GetSampler(const SamplerEntry& sampler) const {          return AppendSuffix(sampler.index, "sampler");      } -    std::string GetImage(const Image& image) const { +    std::string GetImage(const ImageEntry& image) const {          return AppendSuffix(image.index, "image");      } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c4ff47875..be68994bb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,8 +20,8 @@ namespace OpenGL {  class Device;  using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::Sampler; -using ImageEntry = VideoCommon::Shader::Image; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry;  class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {  public: diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 691c6c79b..553e6e8d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {      }  } +void ProgramManager::BindHostCompute(GLuint program) { +    if (use_assembly_programs) { +        glDisable(GL_COMPUTE_PROGRAM_NV); +    } +    glUseProgram(program); +    is_graphics_bound = false; +} + +void ProgramManager::RestoreGuestCompute() { +    if (use_assembly_programs) { +        glEnable(GL_COMPUTE_PROGRAM_NV); +        glUseProgram(0); +    } +} +  void ProgramManager::UseVertexShader(GLuint program) {      if (use_assembly_programs) {          BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 950e0dfcb..ad42cce74 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,6 +45,12 @@ public:      /// Rewinds BindHostPipeline state changes.      void RestoreGuestPipeline(); +    /// Binds an OpenGL GLSL program object unsynchronized with the guest state. +    void BindHostCompute(GLuint program); + +    /// Rewinds BindHostCompute state changes. +    void RestoreGuestCompute(); +      void UseVertexShader(GLuint program);      void UseGeometryShader(GLuint program);      void UseFragmentShader(GLuint program); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 45f4fc565..60e6fa39f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}      }  } +void StateTracker::InvalidateStreamBuffer() { +    flags[Dirty::VertexBuffers] = true; +    for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { +        flags[index] = true; +    } +} +  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 9d127548f..574615d3c 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -92,6 +92,8 @@ class StateTracker {  public:      explicit StateTracker(Tegra::GPU& gpu); +    void InvalidateStreamBuffer(); +      void BindIndexBuffer(GLuint new_index_buffer) {          if (index_buffer == new_index_buffer) {              return; @@ -100,6 +102,14 @@ public:          glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);      } +    void BindFramebuffer(GLuint new_framebuffer) { +        if (framebuffer == new_framebuffer) { +            return; +        } +        framebuffer = new_framebuffer; +        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); +    } +      void NotifyScreenDrawVertexArray() {          flags[OpenGL::Dirty::VertexFormats] = true;          flags[OpenGL::Dirty::VertexFormat0 + 0] = true; @@ -129,9 +139,9 @@ public:          flags[OpenGL::Dirty::Scissor0] = true;      } -    void NotifyColorMask0() { +    void NotifyColorMask(size_t index) {          flags[OpenGL::Dirty::ColorMasks] = true; -        flags[OpenGL::Dirty::ColorMask0] = true; +        flags[OpenGL::Dirty::ColorMask0 + index] = true;      }      void NotifyBlend0() { @@ -190,6 +200,7 @@ public:  private:      Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; +    GLuint framebuffer = 0;      GLuint index_buffer = 0;  }; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 887995cf4..e0819cdf2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,6 +9,7 @@  #include "common/assert.h"  #include "common/microprofile.h"  #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_state_tracker.h"  #include "video_core/renderer_opengl/gl_stream_buffer.h"  MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",  namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) -    : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) +    : state_tracker{state_tracker_} {      gl_buffer.Create(); -    GLsizeiptr allocate_size = size; -    if (vertex_data_usage) { -        // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer -        // read position is near the end and is an out-of-bound access to the vertex buffer. This is -        // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the -        // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the -        // crash. -        allocate_size *= 2; -    } -      static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; -    glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); +    glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);      mapped_ptr = static_cast<u8*>( -        glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); +        glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));      if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {          glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); @@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {      gl_buffer.Release();  } -std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { -    ASSERT(size <= buffer_size); -    ASSERT(alignment <= buffer_size); +std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { +    ASSERT(size <= BUFFER_SIZE); +    ASSERT(alignment <= BUFFER_SIZE);      mapped_size = size;      if (alignment > 0) {          buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);      } -    bool invalidate = false; -    if (buffer_pos + size > buffer_size) { +    if (buffer_pos + size > BUFFER_SIZE) {          MICROPROFILE_SCOPE(OpenGL_StreamBuffer);          glInvalidateBufferData(gl_buffer.handle); +        state_tracker.InvalidateStreamBuffer();          buffer_pos = 0; -        invalidate = true;      } -    return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); +    return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);  }  void OGLStreamBuffer::Unmap(GLsizeiptr size) { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 307a67113..dd9cf67eb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -4,29 +4,31 @@  #pragma once -#include <tuple> +#include <utility> +  #include <glad/glad.h> +  #include "common/common_types.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  namespace OpenGL {  class Device; +class StateTracker;  class OGLStreamBuffer : private NonCopyable {  public: -    explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); +    explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);      ~OGLStreamBuffer();      /*       * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes       * and the optional alignment requirement.       * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. -     * The return values are the pointer to the new chunk, the offset within the buffer, -     * and the invalidation flag for previous chunks. +     * The return values are the pointer to the new chunk, and the offset within the buffer.       * The actual used size must be specified on unmapping the chunk.       */ -    std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); +    std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);      void Unmap(GLsizeiptr size); @@ -39,15 +41,18 @@ public:      }      GLsizeiptr Size() const noexcept { -        return buffer_size; +        return BUFFER_SIZE;      }  private: +    static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; + +    StateTracker& state_tracker; +      OGLBuffer gl_buffer;      GLuint64EXT gpu_address = 0;      GLintptr buffer_pos = 0; -    GLsizeiptr buffer_size = 0;      GLsizeiptr mapped_size = 0;      u8* mapped_ptr = nullptr;  }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index daf352b50..4c690418c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -2,173 +2,238 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -#include "common/assert.h" -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "core/core.h" -#include "video_core/morton.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include <algorithm> +#include <array> +#include <bit> +#include <string> + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_shader_manager.h"  #include "video_core/renderer_opengl/gl_state_tracker.h"  #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache/surface_base.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/samples_helper.h"  #include "video_core/texture_cache/texture_cache.h" -#include "video_core/textures/convert.h" -#include "video_core/textures/texture.h" +#include "video_core/textures/decoders.h"  namespace OpenGL { -using Tegra::Texture::SwizzleSource; -using VideoCore::MortonSwizzleMode; +namespace { +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureMipmapFilter; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCommon::CalculateLevelStrideAlignment; +using VideoCommon::ImageCopy; +using VideoCommon::ImageFlagBits; +using VideoCommon::ImageType; +using VideoCommon::NUM_RT; +using VideoCommon::SamplesLog2; +using VideoCommon::SwizzleParameters; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsPixelFormatSRGB; +using VideoCore::Surface::MaxPixelFormat;  using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget;  using VideoCore::Surface::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", -                    MP_RGB(128, 192, 128)); +struct CopyOrigin { +    GLint level; +    GLint x; +    GLint y; +    GLint z; +}; -namespace { +struct CopyRegion { +    GLsizei width; +    GLsizei height; +    GLsizei depth; +};  struct FormatTuple {      GLenum internal_format;      GLenum format = GL_NONE;      GLenum type = GL_NONE; +    GLenum store_format = internal_format;  }; -constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ -    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // A8B8G8R8_UNORM -    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                               // A8B8G8R8_SNORM -    {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                            // A8B8G8R8_SINT -    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                  // A8B8G8R8_UINT -    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                     // R5G6B5_UNORM -    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                 // B5G6R5_UNORM -    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM -    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM -    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT -    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM -    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM -    {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM -    {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT -    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                      // R8_UINT -    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                             // R16G16B16A16_FLOAT -    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                          // R16G16B16A16_UNORM -    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                             // R16G16B16A16_SNORM -    {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                          // R16G16B16A16_SINT -    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                // R16G16B16A16_UINT -    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},     // B10G11R11_FLOAT -    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                  // R32G32B32A32_UINT -    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                               // BC1_RGBA_UNORM -    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                               // BC2_UNORM -    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                               // BC3_UNORM -    {GL_COMPRESSED_RED_RGTC1},                                        // BC4_UNORM -    {GL_COMPRESSED_SIGNED_RED_RGTC1},                                 // BC4_SNORM -    {GL_COMPRESSED_RG_RGTC2},                                         // BC5_UNORM -    {GL_COMPRESSED_SIGNED_RG_RGTC2},                                  // BC5_SNORM -    {GL_COMPRESSED_RGBA_BPTC_UNORM},                                  // BC7_UNORM -    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT -    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT -    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM -    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM -    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT -    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT -    {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT -    {GL_RG32I, GL_RG_INTEGER, GL_INT},                                // R32G32_SINT -    {GL_R32F, GL_RED, GL_FLOAT},                                      // R32_FLOAT -    {GL_R16F, GL_RED, GL_HALF_FLOAT},                                 // R16_FLOAT -    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                              // R16_UNORM -    {GL_R16_SNORM, GL_RED, GL_SHORT},                                 // R16_SNORM -    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                    // R16_UINT -    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                              // R16_SINT -    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                              // R16G16_UNORM -    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                 // R16G16_FLOAT -    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                    // R16G16_UINT -    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                              // R16G16_SINT -    {GL_RG16_SNORM, GL_RG, GL_SHORT},                                 // R16G16_SNORM -    {GL_RGB32F, GL_RGB, GL_FLOAT},                                    // R32G32B32_FLOAT -    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},          // A8B8G8R8_SRGB -    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                // R8G8_UNORM -    {GL_RG8_SNORM, GL_RG, GL_BYTE},                                   // R8G8_SNORM -    {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                // R8G8_SINT -    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                      // R8G8_UINT -    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                      // R32G32_UINT -    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16X16_FLOAT -    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                      // R32_UINT -    {GL_R32I, GL_RED_INTEGER, GL_INT},                                // R32_SINT -    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM -    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM -    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM -    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_UNORM -    // Compressed sRGB formats -    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},           // BC1_RGBA_SRGB -    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},           // BC2_SRGB -    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},           // BC3_SRGB -    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},              // BC7_SRGB -    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},          // ASTC_2D_4X4_SRGB -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},          // ASTC_2D_8X8_SRGB -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},          // ASTC_2D_8X5_SRGB -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},          // ASTC_2D_5X4_SRGB -    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                  // ASTC_2D_5X5_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},          // ASTC_2D_5X5_SRGB -    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                 // ASTC_2D_10X8_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},         // ASTC_2D_10X8_SRGB -    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                  // ASTC_2D_6X6_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},          // ASTC_2D_6X6_SRGB -    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                // ASTC_2D_10X10_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},        // ASTC_2D_10X10_SRGB -    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                // ASTC_2D_12X12_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},        // ASTC_2D_12X12_SRGB -    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                  // ASTC_2D_8X6_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},          // ASTC_2D_8X6_SRGB -    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                  // ASTC_2D_6X5_UNORM -    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},          // ASTC_2D_6X5_SRGB -    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},  // E5B9G9R9_FLOAT - -    // Depth formats -    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},         // D32_FLOAT -    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - -    // DepthStencil formats -    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT -    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM +constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ +    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                  // A8B8G8R8_UNORM +    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                                // A8B8G8R8_SNORM +    {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                             // A8B8G8R8_SINT +    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                   // A8B8G8R8_UINT +    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                      // R5G6B5_UNORM +    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                  // B5G6R5_UNORM +    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},              // A1R5G5B5_UNORM +    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},            // A2B10G10R10_UNORM +    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV},  // A2B10G10R10_UINT +    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},              // A1B5G5R5_UNORM +    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                 // R8_UNORM +    {GL_R8_SNORM, GL_RED, GL_BYTE},                                    // R8_SNORM +    {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                 // R8_SINT +    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                       // R8_UINT +    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16A16_FLOAT +    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                           // R16G16B16A16_UNORM +    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                              // R16G16B16A16_SNORM +    {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                           // R16G16B16A16_SINT +    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                 // R16G16B16A16_UINT +    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},      // B10G11R11_FLOAT +    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                   // R32G32B32A32_UINT +    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                                // BC1_RGBA_UNORM +    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                                // BC2_UNORM +    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                                // BC3_UNORM +    {GL_COMPRESSED_RED_RGTC1},                                         // BC4_UNORM +    {GL_COMPRESSED_SIGNED_RED_RGTC1},                                  // BC4_SNORM +    {GL_COMPRESSED_RG_RGTC2},                                          // BC5_UNORM +    {GL_COMPRESSED_SIGNED_RG_RGTC2},                                   // BC5_SNORM +    {GL_COMPRESSED_RGBA_BPTC_UNORM},                                   // BC7_UNORM +    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                           // BC6H_UFLOAT +    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                             // BC6H_SFLOAT +    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                 // ASTC_2D_4X4_UNORM +    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                             // B8G8R8A8_UNORM +    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                   // R32G32B32A32_FLOAT +    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                             // R32G32B32A32_SINT +    {GL_RG32F, GL_RG, GL_FLOAT},                                       // R32G32_FLOAT +    {GL_RG32I, GL_RG_INTEGER, GL_INT},                                 // R32G32_SINT +    {GL_R32F, GL_RED, GL_FLOAT},                                       // R32_FLOAT +    {GL_R16F, GL_RED, GL_HALF_FLOAT},                                  // R16_FLOAT +    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                               // R16_UNORM +    {GL_R16_SNORM, GL_RED, GL_SHORT},                                  // R16_SNORM +    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                     // R16_UINT +    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                               // R16_SINT +    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                               // R16G16_UNORM +    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                  // R16G16_FLOAT +    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                     // R16G16_UINT +    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                               // R16G16_SINT +    {GL_RG16_SNORM, GL_RG, GL_SHORT},                                  // R16G16_SNORM +    {GL_RGB32F, GL_RGB, GL_FLOAT},                                     // R32G32B32_FLOAT +    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB +    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                 // R8G8_UNORM +    {GL_RG8_SNORM, GL_RG, GL_BYTE},                                    // R8G8_SNORM +    {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                 // R8G8_SINT +    {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                       // R8G8_UINT +    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                       // R32G32_UINT +    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                               // R16G16B16X16_FLOAT +    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                       // R32_UINT +    {GL_R32I, GL_RED_INTEGER, GL_INT},                                 // R32_SINT +    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                 // ASTC_2D_8X8_UNORM +    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                 // ASTC_2D_8X5_UNORM +    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                 // ASTC_2D_5X4_UNORM +    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8},            // B8G8R8A8_UNORM +    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                          // BC1_RGBA_SRGB +    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                          // BC2_SRGB +    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                          // BC3_SRGB +    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                             // BC7_SRGB +    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},                // A4B4G4R4_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                         // ASTC_2D_4X4_SRGB +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                         // ASTC_2D_8X8_SRGB +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                         // ASTC_2D_8X5_SRGB +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},                         // ASTC_2D_5X4_SRGB +    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                                 // ASTC_2D_5X5_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},                         // ASTC_2D_5X5_SRGB +    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                                // ASTC_2D_10X8_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},                        // ASTC_2D_10X8_SRGB +    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                                 // ASTC_2D_6X6_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},                         // ASTC_2D_6X6_SRGB +    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                               // ASTC_2D_10X10_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},                       // ASTC_2D_10X10_SRGB +    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                               // ASTC_2D_12X12_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},                       // ASTC_2D_12X12_SRGB +    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                                 // ASTC_2D_8X6_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},                         // ASTC_2D_8X6_SRGB +    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                                 // ASTC_2D_6X5_UNORM +    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},                         // ASTC_2D_6X5_SRGB +    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                 // E5B9G9R9_FLOAT +    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},             // D32_FLOAT +    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},     // D16_UNORM +    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},     // D24_UNORM_S8_UINT +    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},     // S8_UINT_D24_UNORM      {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,       GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT  }}; +constexpr std::array ACCELERATED_FORMATS{ +    GL_RGBA32F,   GL_RGBA16F,   GL_RG32F,    GL_RG16F,        GL_R11F_G11F_B10F, GL_R32F, +    GL_R16F,      GL_RGBA32UI,  GL_RGBA16UI, GL_RGB10_A2UI,   GL_RGBA8UI,        GL_RG32UI, +    GL_RG16UI,    GL_RG8UI,     GL_R32UI,    GL_R16UI,        GL_R8UI,           GL_RGBA32I, +    GL_RGBA16I,   GL_RGBA8I,    GL_RG32I,    GL_RG16I,        GL_RG8I,           GL_R32I, +    GL_R16I,      GL_R8I,       GL_RGBA16,   GL_RGB10_A2,     GL_RGBA8,          GL_RG16, +    GL_RG8,       GL_R16,       GL_R8,       GL_RGBA16_SNORM, GL_RGBA8_SNORM,    GL_RG16_SNORM, +    GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, +}; +  const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { -    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); -    return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; +    ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); +    return FORMAT_TABLE[static_cast<size_t>(pixel_format)];  } -GLenum GetTextureTarget(const SurfaceTarget& target) { -    switch (target) { -    case SurfaceTarget::TextureBuffer: +GLenum ImageTarget(const VideoCommon::ImageInfo& info) { +    switch (info.type) { +    case ImageType::e1D: +        return GL_TEXTURE_1D_ARRAY; +    case ImageType::e2D: +        if (info.num_samples > 1) { +            return GL_TEXTURE_2D_MULTISAMPLE_ARRAY; +        } +        return GL_TEXTURE_2D_ARRAY; +    case ImageType::e3D: +        return GL_TEXTURE_3D; +    case ImageType::Linear: +        return GL_TEXTURE_2D_ARRAY; +    case ImageType::Buffer:          return GL_TEXTURE_BUFFER; -    case SurfaceTarget::Texture1D: +    } +    UNREACHABLE_MSG("Invalid image type={}", info.type); +    return GL_NONE; +} + +GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +    const bool is_multisampled = num_samples > 1; +    switch (type) { +    case ImageViewType::e1D:          return GL_TEXTURE_1D; -    case SurfaceTarget::Texture2D: -        return GL_TEXTURE_2D; -    case SurfaceTarget::Texture3D: +    case ImageViewType::e2D: +        return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; +    case ImageViewType::Cube: +        return GL_TEXTURE_CUBE_MAP; +    case ImageViewType::e3D:          return GL_TEXTURE_3D; -    case SurfaceTarget::Texture1DArray: +    case ImageViewType::e1DArray:          return GL_TEXTURE_1D_ARRAY; -    case SurfaceTarget::Texture2DArray: -        return GL_TEXTURE_2D_ARRAY; -    case SurfaceTarget::TextureCubemap: -        return GL_TEXTURE_CUBE_MAP; -    case SurfaceTarget::TextureCubeArray: +    case ImageViewType::e2DArray: +        return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; +    case ImageViewType::CubeArray:          return GL_TEXTURE_CUBE_MAP_ARRAY; +    case ImageViewType::Rect: +        return GL_TEXTURE_RECTANGLE; +    case ImageViewType::Buffer: +        return GL_TEXTURE_BUFFER;      } -    UNREACHABLE(); -    return {}; +    UNREACHABLE_MSG("Invalid image view type={}", type); +    return GL_NONE;  } -GLint GetSwizzleSource(SwizzleSource source) { +GLenum TextureMode(PixelFormat format, bool is_first) { +    switch (format) { +    case PixelFormat::D24_UNORM_S8_UINT: +    case PixelFormat::D32_FLOAT_S8_UINT: +        return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; +    case PixelFormat::S8_UINT_D24_UNORM: +        return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; +    default: +        UNREACHABLE(); +        return GL_DEPTH_COMPONENT; +    } +} + +GLint Swizzle(SwizzleSource source) {      switch (source) {      case SwizzleSource::Zero:          return GL_ZERO; @@ -184,530 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) {      case SwizzleSource::OneFloat:          return GL_ONE;      } -    UNREACHABLE(); +    UNREACHABLE_MSG("Invalid swizzle source={}", source);      return GL_NONE;  } -GLenum GetComponent(PixelFormat format, bool is_first) { -    switch (format) { -    case PixelFormat::D24_UNORM_S8_UINT: -    case PixelFormat::D32_FLOAT_S8_UINT: -        return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; -    case PixelFormat::S8_UINT_D24_UNORM: -        return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; +GLenum AttachmentType(PixelFormat format) { +    switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { +    case SurfaceType::Depth: +        return GL_DEPTH_ATTACHMENT; +    case SurfaceType::DepthStencil: +        return GL_DEPTH_STENCIL_ATTACHMENT;      default: -        UNREACHABLE(); -        return GL_DEPTH_COMPONENT; +        UNIMPLEMENTED_MSG("Unimplemented type={}", type); +        return GL_NONE;      }  } -void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { -    if (params.IsBuffer()) { -        return; +[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) { +    if (!device.HasASTC() && IsPixelFormatASTC(format)) { +        return true;      } -    glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -    glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -    glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -    glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); -    if (params.num_levels == 1) { -        glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); +    switch (format) { +    case PixelFormat::BC4_UNORM: +    case PixelFormat::BC5_UNORM: +        return type == ImageType::e3D; +    default: +        break;      } +    return false;  } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, -                         OGLBuffer& texture_buffer) { -    OGLTexture texture; -    texture.Create(target); +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { +    switch (value) { +    case SwizzleSource::G: +        return SwizzleSource::R; +    default: +        return value; +    } +} -    switch (params.target) { -    case SurfaceTarget::Texture1D: -        glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); -        break; -    case SurfaceTarget::TextureBuffer: -        texture_buffer.Create(); -        glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), -                             nullptr, GL_DYNAMIC_STORAGE_BIT); -        glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); +void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) { +    switch (format) { +    case PixelFormat::D24_UNORM_S8_UINT: +    case PixelFormat::D32_FLOAT_S8_UINT: +    case PixelFormat::S8_UINT_D24_UNORM: +        UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); +        glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, +                            TextureMode(format, swizzle[0] == SwizzleSource::R)); +        std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);          break; -    case SurfaceTarget::Texture2D: -    case SurfaceTarget::TextureCubemap: -        glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, -                           params.height); +    default:          break; -    case SurfaceTarget::Texture3D: -    case SurfaceTarget::Texture2DArray: -    case SurfaceTarget::TextureCubeArray: -        glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, -                           params.height, params.depth); +    } +    std::array<GLint, 4> gl_swizzle; +    std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle); +    glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); +} + +[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, +                                    const VideoCommon::ImageInfo& info) { +    // Disable accelerated uploads for now as they don't implement swizzled uploads +    return false; +    switch (info.type) { +    case ImageType::e2D: +    case ImageType::e3D: +    case ImageType::Linear:          break;      default: -        UNREACHABLE(); +        return false; +    } +    const GLenum internal_format = GetFormatTuple(info.format).internal_format; +    const auto& format_info = runtime.FormatInfo(info.type, internal_format); +    if (format_info.is_compressed) { +        return false; +    } +    if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { +        return false;      } +    if (format_info.compatibility_by_size) { +        return true; +    } +    const GLenum store_format = StoreFormat(BytesPerBlock(info.format)); +    const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class; +    return format_info.compatibility_class == store_class; +} -    ApplyTextureDefaults(params, texture.handle); +[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, +                                        VideoCommon::SubresourceLayers subresource, GLenum target) { +    switch (target) { +    case GL_TEXTURE_2D_ARRAY: +    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: +        return CopyOrigin{ +            .level = static_cast<GLint>(subresource.base_level), +            .x = static_cast<GLint>(offset.x), +            .y = static_cast<GLint>(offset.y), +            .z = static_cast<GLint>(subresource.base_layer), +        }; +    case GL_TEXTURE_3D: +        return CopyOrigin{ +            .level = static_cast<GLint>(subresource.base_level), +            .x = static_cast<GLint>(offset.x), +            .y = static_cast<GLint>(offset.y), +            .z = static_cast<GLint>(offset.z), +        }; +    default: +        UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); +        return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0}; +    } +} -    return texture; +[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent, +                                        VideoCommon::SubresourceLayers dst_subresource, +                                        GLenum target) { +    switch (target) { +    case GL_TEXTURE_2D_ARRAY: +    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: +        return CopyRegion{ +            .width = static_cast<GLsizei>(extent.width), +            .height = static_cast<GLsizei>(extent.height), +            .depth = static_cast<GLsizei>(dst_subresource.num_layers), +        }; +    case GL_TEXTURE_3D: +        return CopyRegion{ +            .width = static_cast<GLsizei>(extent.width), +            .height = static_cast<GLsizei>(extent.height), +            .depth = static_cast<GLsizei>(extent.depth), +        }; +    default: +        UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); +        return CopyRegion{.width = 0, .height = 0, .depth = 0}; +    }  } -constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, -                            SwizzleSource w_source) { -    return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | -           (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); +void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { +    if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { +        const GLuint texture = image_view->DefaultHandle(); +        glNamedFramebufferTexture(fbo, attachment, texture, 0); +        return; +    } +    const GLuint texture = image_view->Handle(ImageViewType::e3D); +    if (image_view->range.extent.layers > 1) { +        // TODO: OpenGL doesn't support rendering to a fixed number of slices +        glNamedFramebufferTexture(fbo, attachment, texture, 0); +    } else { +        const u32 slice = image_view->range.base.layer; +        glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice); +    }  }  } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, -                             bool is_astc_supported_) -    : SurfaceBase<View>{gpu_addr_, params_, is_astc_supported_} { -    if (is_converted) { -        internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; -        format = GL_RGBA; -        type = GL_UNSIGNED_BYTE; -    } else { -        const auto& tuple{GetFormatTuple(params.pixel_format)}; -        internal_format = tuple.internal_format; -        format = tuple.format; -        type = tuple.type; -        is_compressed = params.IsCompressed(); -    } -    target = GetTextureTarget(params.target); -    texture = CreateTexture(params, target, internal_format, texture_buffer); -    DecorateSurfaceName(); +ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) +    : span(map, size), sync{sync_}, handle{handle_} {} -    u32 num_layers = 1; -    if (params.is_layered || params.target == SurfaceTarget::Texture3D) { -        num_layers = params.depth; +ImageBufferMap::~ImageBufferMap() { +    if (sync) { +        sync->Create();      } - -    main_view = -        CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);  } -CachedSurface::~CachedSurface() = default; +TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, +                                         StateTracker& state_tracker_) +    : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { +    static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; +    for (size_t i = 0; i < TARGETS.size(); ++i) { +        const GLenum target = TARGETS[i]; +        for (const FormatTuple& tuple : FORMAT_TABLE) { +            const GLenum format = tuple.internal_format; +            GLint compat_class; +            GLint compat_type; +            GLint is_compressed; +            glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class); +            glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1, +                                  &compat_type); +            glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed); +            const FormatProperties properties{ +                .compatibility_class = static_cast<GLenum>(compat_class), +                .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE, +                .is_compressed = is_compressed == GL_TRUE, +            }; +            format_properties[i].emplace(format, properties); +        } +    } +    null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); +    null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); +    null_image_3d.Create(GL_TEXTURE_3D); +    null_image_rect.Create(GL_TEXTURE_RECTANGLE); +    glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); +    glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); +    glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); +    glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); + +    std::array<GLuint, 4> new_handles; +    glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); +    null_image_view_1d.handle = new_handles[0]; +    null_image_view_2d.handle = new_handles[1]; +    null_image_view_2d_array.handle = new_handles[2]; +    null_image_view_cube.handle = new_handles[3]; +    glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1, +                  0, 1); +    glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0, +                  1, 0, 1); +    glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY, +                  null_image_cube_array.handle, GL_R8, 0, 1, 0, 1); +    glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, +                  GL_R8, 0, 1, 0, 6); +    const std::array texture_handles{ +        null_image_1d_array.handle,      null_image_cube_array.handle, null_image_3d.handle, +        null_image_rect.handle,          null_image_view_1d.handle,    null_image_view_2d.handle, +        null_image_view_2d_array.handle, null_image_view_cube.handle, +    }; +    for (const GLuint handle : texture_handles) { +        static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; +        glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); +    } +    const auto set_view = [this](ImageViewType type, GLuint handle) { +        if (device.HasDebuggingToolAttached()) { +            const std::string name = fmt::format("NullImage {}", type); +            glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); +        } +        null_image_views[static_cast<size_t>(type)] = handle; +    }; +    set_view(ImageViewType::e1D, null_image_view_1d.handle); +    set_view(ImageViewType::e2D, null_image_view_2d.handle); +    set_view(ImageViewType::Cube, null_image_view_cube.handle); +    set_view(ImageViewType::e3D, null_image_3d.handle); +    set_view(ImageViewType::e1DArray, null_image_1d_array.handle); +    set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); +    set_view(ImageViewType::CubeArray, null_image_cube_array.handle); +    set_view(ImageViewType::Rect, null_image_rect.handle); +} -void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { -    MICROPROFILE_SCOPE(OpenGL_Texture_Download); +TextureCacheRuntime::~TextureCacheRuntime() = default; -    if (params.IsBuffer()) { -        glGetNamedBufferSubData(texture_buffer.handle, 0, -                                static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), -                                staging_buffer.data()); -        return; -    } +void TextureCacheRuntime::Finish() { +    glFinish(); +} -    SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { +    return upload_buffers.RequestMap(size, true); +} -    for (u32 level = 0; level < params.emulated_levels; ++level) { -        glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); -        glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); -        const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); +ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { +    return download_buffers.RequestMap(size, false); +} -        u8* const mip_data = staging_buffer.data() + mip_offset; -        const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); -        if (is_compressed) { -            glGetCompressedTextureImage(texture.handle, level, size, mip_data); -        } else { -            glGetTextureImage(texture.handle, level, format, type, size, mip_data); -        } +void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, +                                    std::span<const ImageCopy> copies) { +    const GLuint dst_name = dst_image.Handle(); +    const GLuint src_name = src_image.Handle(); +    const GLenum dst_target = ImageTarget(dst_image.info); +    const GLenum src_target = ImageTarget(src_image.info); +    for (const ImageCopy& copy : copies) { +        const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); +        const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); +        const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); +        glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y, +                           src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x, +                           dst_origin.y, dst_origin.z, region.width, region.height, region.depth);      }  } -void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { -    MICROPROFILE_SCOPE(OpenGL_Texture_Upload); -    SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); -    for (u32 level = 0; level < params.emulated_levels; ++level) { -        UploadTextureMipmap(level, staging_buffer); +bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { +    if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { +        return false;      } +    return true;  } -void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { -    glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); -    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); - -    const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); -    const u8* buffer{staging_buffer.data() + mip_offset}; -    if (is_compressed) { -        const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; -        switch (params.target) { -        case SurfaceTarget::Texture2D: -            glCompressedTextureSubImage2D(texture.handle, level, 0, 0, -                                          static_cast<GLsizei>(params.GetMipWidth(level)), -                                          static_cast<GLsizei>(params.GetMipHeight(level)), -                                          internal_format, image_size, buffer); -            break; -        case SurfaceTarget::Texture3D: -        case SurfaceTarget::Texture2DArray: -        case SurfaceTarget::TextureCubeArray: -            glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, -                                          static_cast<GLsizei>(params.GetMipWidth(level)), -                                          static_cast<GLsizei>(params.GetMipHeight(level)), -                                          static_cast<GLsizei>(params.GetMipDepth(level)), -                                          internal_format, image_size, buffer); -            break; -        case SurfaceTarget::TextureCubemap: { -            const std::size_t host_layer_size{params.GetHostLayerSize(level)}; -            for (std::size_t face = 0; face < params.depth; ++face) { -                glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), -                                              static_cast<GLsizei>(params.GetMipWidth(level)), -                                              static_cast<GLsizei>(params.GetMipHeight(level)), 1, -                                              internal_format, -                                              static_cast<GLsizei>(host_layer_size), buffer); -                buffer += host_layer_size; -            } -            break; -        } -        default: -            UNREACHABLE(); -        } +void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, +                                           std::span<const ImageCopy> copies) { +    if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { +        ASSERT(src.info.type == ImageType::e3D); +        util_shaders.CopyBC4(dst, src, copies);      } else { -        switch (params.target) { -        case SurfaceTarget::Texture1D: -            glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, -                                buffer); -            break; -        case SurfaceTarget::TextureBuffer: -            ASSERT(level == 0); -            glNamedBufferSubData(texture_buffer.handle, 0, -                                 params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); -            break; -        case SurfaceTarget::Texture1DArray: -        case SurfaceTarget::Texture2D: -            glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), -                                params.GetMipHeight(level), format, type, buffer); -            break; -        case SurfaceTarget::Texture3D: -        case SurfaceTarget::Texture2DArray: -        case SurfaceTarget::TextureCubeArray: -            glTextureSubImage3D( -                texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)), -                static_cast<GLsizei>(params.GetMipHeight(level)), -                static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer); -            break; -        case SurfaceTarget::TextureCubemap: -            for (std::size_t face = 0; face < params.depth; ++face) { -                glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), -                                    params.GetMipWidth(level), params.GetMipHeight(level), 1, -                                    format, type, buffer); -                buffer += params.GetHostLayerSize(level); -            } -            break; -        default: -            UNREACHABLE(); -        } +        UNREACHABLE();      }  } -void CachedSurface::DecorateSurfaceName() { -    LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); -} +void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, +                                          const std::array<Offset2D, 2>& dst_region, +                                          const std::array<Offset2D, 2>& src_region, +                                          Tegra::Engines::Fermi2D::Filter filter, +                                          Tegra::Engines::Fermi2D::Operation operation) { +    state_tracker.NotifyScissor0(); +    state_tracker.NotifyRasterizeEnable(); +    state_tracker.NotifyFramebufferSRGB(); -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { -    LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); +    ASSERT(dst->BufferBits() == src->BufferBits()); + +    glEnable(GL_FRAMEBUFFER_SRGB); +    glDisable(GL_RASTERIZER_DISCARD); +    glDisablei(GL_SCISSOR_TEST, 0); + +    const GLbitfield buffer_bits = dst->BufferBits(); +    const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; +    const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; +    glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, +                           src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, +                           dst_region[1].x, dst_region[1].y, buffer_bits, +                           is_linear ? GL_LINEAR : GL_NEAREST);  } -View CachedSurface::CreateView(const ViewParams& view_key) { -    return CreateViewInner(view_key, false); +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, +                                                size_t buffer_offset, +                                                std::span<const SwizzleParameters> swizzles) { +    switch (image.info.type) { +    case ImageType::e2D: +        return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); +    case ImageType::e3D: +        return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); +    case ImageType::Linear: +        return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); +    default: +        UNREACHABLE(); +        break; +    }  } -View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { -    auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); -    views[view_key] = view; -    if (!is_proxy) -        view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); -    return view; +void TextureCacheRuntime::InsertUploadMemoryBarrier() { +    glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);  } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, -                                     bool is_proxy_) -    : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, -      target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { -    if (!is_proxy_) { -        main_view = CreateTextureView(); +FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const { +    switch (type) { +    case ImageType::e1D: +        return format_properties[0].at(internal_format); +    case ImageType::e2D: +    case ImageType::Linear: +        return format_properties[1].at(internal_format); +    case ImageType::e3D: +        return format_properties[2].at(internal_format); +    default: +        UNREACHABLE(); +        return FormatProperties{};      }  } -CachedSurfaceView::~CachedSurfaceView() = default; +TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) +    : storage_flags{storage_flags_}, map_flags{map_flags_} {} -void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { -    ASSERT(params.num_levels == 1); +TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; -    if (params.target == SurfaceTarget::Texture3D) { -        if (params.num_layers > 1) { -            ASSERT(params.base_layer == 0); -            glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); -        } else { -            glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, -                                   params.base_level, params.base_layer); -        } -        return; +ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, +                                                               bool insert_fence) { +    const size_t index = RequestBuffer(requested_size); +    OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; +    return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); +} + +size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { +    if (const std::optional<size_t> index = FindBuffer(requested_size); index) { +        return *index;      } -    if (params.num_layers > 1) { -        UNIMPLEMENTED_IF(params.base_layer != 0); -        glFramebufferTexture(fb_target, attachment, GetTexture(), 0); -        return; +    OGLBuffer& buffer = buffers.emplace_back(); +    buffer.Create(); +    glNamedBufferStorage(buffer.handle, requested_size, nullptr, +                         storage_flags | GL_MAP_PERSISTENT_BIT); +    maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, +                                                          map_flags | GL_MAP_PERSISTENT_BIT))); + +    syncs.emplace_back(); +    sizes.push_back(requested_size); + +    ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && +           maps.size() == sizes.size()); + +    return buffers.size() - 1; +} + +std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { +    size_t smallest_buffer = std::numeric_limits<size_t>::max(); +    std::optional<size_t> found; +    const size_t num_buffers = sizes.size(); +    for (size_t index = 0; index < num_buffers; ++index) { +        const size_t buffer_size = sizes[index]; +        if (buffer_size < requested_size || buffer_size >= smallest_buffer) { +            continue; +        } +        if (syncs[index].handle != 0) { +            GLint status; +            glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); +            if (status != GL_SIGNALED) { +                continue; +            } +            syncs[index].Release(); +        } +        smallest_buffer = buffer_size; +        found = index;      } +    return found; +} -    const GLenum view_target = surface.GetTarget(); -    const GLuint texture = surface.GetTexture(); -    switch (surface.GetSurfaceParams().target) { -    case SurfaceTarget::Texture1D: -        glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); +Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, +             VAddr cpu_addr_) +    : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { +    if (CanBeAccelerated(runtime, info)) { +        flags |= ImageFlagBits::AcceleratedUpload; +    } +    if (IsConverted(runtime.device, info.format, info.type)) { +        flags |= ImageFlagBits::Converted; +        gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; +        gl_store_format = GL_RGBA8; +        gl_format = GL_RGBA; +        gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; +    } else { +        const auto& tuple = GetFormatTuple(info.format); +        gl_internal_format = tuple.internal_format; +        gl_store_format = tuple.store_format; +        gl_format = tuple.format; +        gl_type = tuple.type; +    } +    const GLenum target = ImageTarget(info); +    const GLsizei width = info.size.width; +    const GLsizei height = info.size.height; +    const GLsizei depth = info.size.depth; +    const int max_host_mip_levels = std::bit_width(info.size.width); +    const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); +    const GLsizei num_layers = info.resources.layers; +    const GLsizei num_samples = info.num_samples; + +    GLuint handle = 0; +    if (target != GL_TEXTURE_BUFFER) { +        texture.Create(target); +        handle = texture.handle; +    } +    switch (target) { +    case GL_TEXTURE_1D_ARRAY: +        glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers);          break; -    case SurfaceTarget::Texture2D: -        glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); +    case GL_TEXTURE_2D_ARRAY: +        glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers);          break; -    case SurfaceTarget::Texture1DArray: -    case SurfaceTarget::Texture2DArray: -    case SurfaceTarget::TextureCubemap: -    case SurfaceTarget::TextureCubeArray: -        glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, -                                  params.base_layer); +    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { +        // TODO: Where should 'fixedsamplelocations' come from? +        const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); +        glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, +                                      height >> samples_y, num_layers, GL_FALSE); +        break; +    } +    case GL_TEXTURE_RECTANGLE: +        glTextureStorage2D(handle, num_levels, gl_store_format, width, height); +        break; +    case GL_TEXTURE_3D: +        glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); +        break; +    case GL_TEXTURE_BUFFER: +        buffer.Create(); +        glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);          break;      default: -        UNIMPLEMENTED(); +        UNREACHABLE_MSG("Invalid target=0x{:x}", target); +        break; +    } +    if (runtime.device.HasDebuggingToolAttached()) { +        const std::string name = VideoCommon::Name(*this); +        glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, +                      static_cast<GLsizei>(name.size()), name.data());      }  } -GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, -                                     SwizzleSource z_source, SwizzleSource w_source) { -    if (GetSurfaceParams().IsBuffer()) { -        return GetTexture(); -    } -    const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); -    if (current_swizzle == new_swizzle) { -        return current_view; -    } -    current_swizzle = new_swizzle; +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                         std::span<const VideoCommon::BufferImageCopy> copies) { +    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); +    glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); -    const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); -    OGLTextureView& view = entry->second; -    if (!is_cache_miss) { -        current_view = view.handle; -        return view.handle; -    } -    view = CreateTextureView(); -    current_view = view.handle; +    glPixelStorei(GL_UNPACK_ALIGNMENT, 1); -    std::array swizzle{x_source, y_source, z_source, w_source}; +    u32 current_row_length = std::numeric_limits<u32>::max(); +    u32 current_image_height = std::numeric_limits<u32>::max(); -    switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { -    case PixelFormat::D24_UNORM_S8_UINT: -    case PixelFormat::D32_FLOAT_S8_UINT: -    case PixelFormat::S8_UINT_D24_UNORM: -        UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); -        glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, -                            GetComponent(pixel_format, x_source == SwizzleSource::R)); - -        // Make sure we sample the first component -        std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { -            return value == SwizzleSource::G ? SwizzleSource::R : value; -        }); -        [[fallthrough]]; -    default: { -        const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), -                                       GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; -        glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); -        break; -    } +    for (const VideoCommon::BufferImageCopy& copy : copies) { +        if (current_row_length != copy.buffer_row_length) { +            current_row_length = copy.buffer_row_length; +            glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); +        } +        if (current_image_height != copy.buffer_image_height) { +            current_image_height = copy.buffer_image_height; +            glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); +        } +        CopyBufferToImage(copy, buffer_offset);      } -    return view.handle;  } -OGLTextureView CachedSurfaceView::CreateTextureView() const { -    OGLTextureView texture_view; -    texture_view.Create(); - -    if (target == GL_TEXTURE_3D) { -        glTextureView(texture_view.handle, target, surface.texture.handle, format, -                      params.base_level, params.num_levels, 0, 1); -    } else { -        glTextureView(texture_view.handle, target, surface.texture.handle, format, -                      params.base_level, params.num_levels, params.base_layer, params.num_layers); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                         std::span<const VideoCommon::BufferCopy> copies) { +    for (const VideoCommon::BufferCopy& copy : copies) { +        glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, +                                 copy.dst_offset, copy.size);      } -    ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); - -    return texture_view;  } -TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, -                                       Tegra::Engines::Maxwell3D& maxwell3d_, -                                       Tegra::MemoryManager& gpu_memory_, const Device& device_, -                                       StateTracker& state_tracker_) -    : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()}, -      state_tracker{state_tracker_} { -    src_framebuffer.Create(); -    dst_framebuffer.Create(); -} +void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, +                           std::span<const VideoCommon::BufferImageCopy> copies) { +    glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API -TextureCacheOpenGL::~TextureCacheOpenGL() = default; +    glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); +    glPixelStorei(GL_PACK_ALIGNMENT, 1); -Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { -    return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); -} +    u32 current_row_length = std::numeric_limits<u32>::max(); +    u32 current_image_height = std::numeric_limits<u32>::max(); -void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, -                                   const VideoCommon::CopyParams& copy_params) { -    const auto& src_params = src_surface->GetSurfaceParams(); -    const auto& dst_params = dst_surface->GetSurfaceParams(); -    if (src_params.type != dst_params.type) { -        // A fallback is needed -        return; +    for (const VideoCommon::BufferImageCopy& copy : copies) { +        if (current_row_length != copy.buffer_row_length) { +            current_row_length = copy.buffer_row_length; +            glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); +        } +        if (current_image_height != copy.buffer_image_height) { +            current_image_height = copy.buffer_image_height; +            glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); +        } +        CopyImageToBuffer(copy, buffer_offset);      } -    const auto src_handle = src_surface->GetTexture(); -    const auto src_target = src_surface->GetTarget(); -    const auto dst_handle = dst_surface->GetTexture(); -    const auto dst_target = dst_surface->GetTarget(); -    glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, -                       copy_params.source_y, copy_params.source_z, dst_handle, dst_target, -                       copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, -                       copy_params.dest_z, copy_params.width, copy_params.height, -                       copy_params.depth);  } -void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, -                                   const Tegra::Engines::Fermi2D::Config& copy_config) { -    const auto& src_params{src_view->GetSurfaceParams()}; -    const auto& dst_params{dst_view->GetSurfaceParams()}; -    UNIMPLEMENTED_IF(src_params.depth != 1); -    UNIMPLEMENTED_IF(dst_params.depth != 1); - -    state_tracker.NotifyScissor0(); -    state_tracker.NotifyFramebuffer(); -    state_tracker.NotifyRasterizeEnable(); -    state_tracker.NotifyFramebufferSRGB(); +void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { +    // Compressed formats don't have a pixel format or type +    const bool is_compressed = gl_format == GL_NONE; +    const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset); -    if (dst_params.srgb_conversion) { -        glEnable(GL_FRAMEBUFFER_SRGB); -    } else { -        glDisable(GL_FRAMEBUFFER_SRGB); +    switch (info.type) { +    case ImageType::e1D: +        if (is_compressed) { +            glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level, +                                          copy.image_offset.x, copy.image_subresource.base_layer, +                                          copy.image_extent.width, +                                          copy.image_subresource.num_layers, gl_internal_format, +                                          static_cast<GLsizei>(copy.buffer_size), offset); +        } else { +            glTextureSubImage2D(texture.handle, copy.image_subresource.base_level, +                                copy.image_offset.x, copy.image_subresource.base_layer, +                                copy.image_extent.width, copy.image_subresource.num_layers, +                                gl_format, gl_type, offset); +        } +        break; +    case ImageType::e2D: +    case ImageType::Linear: +        if (is_compressed) { +            glCompressedTextureSubImage3D( +                texture.handle, copy.image_subresource.base_level, copy.image_offset.x, +                copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width, +                copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format, +                static_cast<GLsizei>(copy.buffer_size), offset); +        } else { +            glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, +                                copy.image_offset.x, copy.image_offset.y, +                                copy.image_subresource.base_layer, copy.image_extent.width, +                                copy.image_extent.height, copy.image_subresource.num_layers, +                                gl_format, gl_type, offset); +        } +        break; +    case ImageType::e3D: +        if (is_compressed) { +            glCompressedTextureSubImage3D( +                texture.handle, copy.image_subresource.base_level, copy.image_offset.x, +                copy.image_offset.y, copy.image_offset.z, copy.image_extent.width, +                copy.image_extent.height, copy.image_extent.depth, gl_internal_format, +                static_cast<GLsizei>(copy.buffer_size), offset); +        } else { +            glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, +                                copy.image_offset.x, copy.image_offset.y, copy.image_offset.z, +                                copy.image_extent.width, copy.image_extent.height, +                                copy.image_extent.depth, gl_format, gl_type, offset); +        } +        break; +    default: +        UNREACHABLE();      } -    glDisable(GL_RASTERIZER_DISCARD); -    glDisablei(GL_SCISSOR_TEST, 0); - -    glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); -    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); - -    GLenum buffers = 0; -    if (src_params.type == SurfaceType::ColorTexture) { -        src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, -                               0); - -        dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, -                               0); - -        buffers = GL_COLOR_BUFFER_BIT; -    } else if (src_params.type == SurfaceType::Depth) { -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -        src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +} -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -        dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { +    const GLint x_offset = copy.image_offset.x; +    const GLsizei width = copy.image_extent.width; -        buffers = GL_DEPTH_BUFFER_BIT; -    } else if (src_params.type == SurfaceType::DepthStencil) { -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -        src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); +    const GLint level = copy.image_subresource.base_level; +    const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size); +    void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset); -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -        dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); +    GLint y_offset = 0; +    GLint z_offset = 0; +    GLsizei height = 1; +    GLsizei depth = 1; -        buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; +    switch (info.type) { +    case ImageType::e1D: +        y_offset = copy.image_subresource.base_layer; +        height = copy.image_subresource.num_layers; +        break; +    case ImageType::e2D: +    case ImageType::Linear: +        y_offset = copy.image_offset.y; +        z_offset = copy.image_subresource.base_layer; +        height = copy.image_extent.height; +        depth = copy.image_subresource.num_layers; +        break; +    case ImageType::e3D: +        y_offset = copy.image_offset.y; +        z_offset = copy.image_offset.z; +        height = copy.image_extent.height; +        depth = copy.image_extent.depth; +        break; +    default: +        UNREACHABLE(); +    } +    // Compressed formats don't have a pixel format or type +    const bool is_compressed = gl_format == GL_NONE; +    if (is_compressed) { +        glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, +                                       height, depth, buffer_size, offset); +    } else { +        glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height, +                             depth, gl_format, gl_type, buffer_size, offset);      } - -    const Common::Rectangle<u32>& src_rect = copy_config.src_rect; -    const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; -    const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - -    glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), -                      static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), -                      static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), -                      static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), -                      buffers, -                      is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);  } -void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { -    MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); -    const auto& src_params = src_surface->GetSurfaceParams(); -    const auto& dst_params = dst_surface->GetSurfaceParams(); -    UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, +                     ImageId image_id_, Image& image) +    : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { +    const Device& device = runtime.device; +    if (True(image.flags & ImageFlagBits::Converted)) { +        internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; +    } else { +        internal_format = GetFormatTuple(format).internal_format; +    } +    VideoCommon::SubresourceRange flatten_range = info.range; +    std::array<GLuint, 2> handles; +    stored_views.reserve(2); -    const auto source_format = GetFormatTuple(src_params.pixel_format); -    const auto dest_format = GetFormatTuple(dst_params.pixel_format); +    switch (info.type) { +    case ImageViewType::e1DArray: +        flatten_range.extent.layers = 1; +        [[fallthrough]]; +    case ImageViewType::e1D: +        glGenTextures(2, handles.data()); +        SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); +        SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); +        break; +    case ImageViewType::e2DArray: +        flatten_range.extent.layers = 1; +        [[fallthrough]]; +    case ImageViewType::e2D: +        if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { +            // 2D and 2D array views on a 3D textures are used exclusively for render targets +            ASSERT(info.range.extent.levels == 1); +            const VideoCommon::SubresourceRange slice_range{ +                .base = {.level = info.range.base.level, .layer = 0}, +                .extent = {.levels = 1, .layers = 1}, +            }; +            glGenTextures(1, handles.data()); +            SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); +            break; +        } +        glGenTextures(2, handles.data()); +        SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); +        SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); +        break; +    case ImageViewType::e3D: +        glGenTextures(1, handles.data()); +        SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); +        break; +    case ImageViewType::CubeArray: +        flatten_range.extent.layers = 6; +        [[fallthrough]]; +    case ImageViewType::Cube: +        glGenTextures(2, handles.data()); +        SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); +        SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); +        break; +    case ImageViewType::Rect: +        glGenTextures(1, handles.data()); +        SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); +        break; +    case ImageViewType::Buffer: +        glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); +        SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); +        break; +    } +    default_handle = Handle(info.type); +} -    const std::size_t source_size = src_surface->GetHostSizeInBytes(); -    const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) +    : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -    const std::size_t buffer_size = std::max(source_size, dest_size); +void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, +                          GLuint handle, const VideoCommon::ImageViewInfo& info, +                          VideoCommon::SubresourceRange view_range) { +    if (info.type == ImageViewType::Buffer) { +        // TODO: Take offset from buffer cache +        glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, +                             image.guest_size_bytes); +    } else { +        const GLuint parent = image.texture.handle; +        const GLenum target = ImageTarget(view_type, image.info.num_samples); +        glTextureView(handle, target, parent, internal_format, view_range.base.level, +                      view_range.extent.levels, view_range.base.layer, view_range.extent.layers); +        if (!info.IsRenderTarget()) { +            ApplySwizzle(handle, format, info.Swizzle()); +        } +    } +    if (device.HasDebuggingToolAttached()) { +        const std::string name = VideoCommon::Name(*this, view_type); +        glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); +    } +    stored_views.emplace_back().handle = handle; +    views[static_cast<size_t>(view_type)] = handle; +} -    GLuint copy_pbo_handle = FetchPBO(buffer_size); +Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { +    const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE; +    const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func); +    const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None); +    const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter); +    const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter); +    const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE; + +    UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); +    UNIMPLEMENTED_IF(config.float_coord_normalization != 0); + +    sampler.Create(); +    const GLuint handle = sampler.handle; +    glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); +    glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); +    glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); +    glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); +    glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); +    glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); +    glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); +    glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); +    glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); +    glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); +    glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + +    if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { +        glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); +    } else { +        LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); +    } +    if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { +        glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); +    } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { +        LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); +    } +    if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { +        glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); +    } else if (seamless == GL_FALSE) { +        // We default to false because it's more common +        LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); +    } +} -    glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, +                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { +    // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of +    // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared +    // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with +    // mismatching size, this is why core framebuffers are preferred. +    GLuint handle; +    glGenFramebuffers(1, &handle); +    glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + +    GLsizei num_buffers = 0; +    std::array<GLenum, NUM_RT> gl_draw_buffers; +    gl_draw_buffers.fill(GL_NONE); + +    for (size_t index = 0; index < color_buffers.size(); ++index) { +        const ImageView* const image_view = color_buffers[index]; +        if (!image_view) { +            continue; +        } +        buffer_bits |= GL_COLOR_BUFFER_BIT; +        gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index]; +        num_buffers = static_cast<GLsizei>(index + 1); -    if (src_surface->IsCompressed()) { -        glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), -                                    nullptr); -    } else { -        glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, -                          static_cast<GLsizei>(source_size), nullptr); +        const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index); +        AttachTexture(handle, attachment, image_view);      } -    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); -    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); +    if (const ImageView* const image_view = depth_buffer; image_view) { +        if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { +            buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; +        } else { +            buffer_bits |= GL_DEPTH_BUFFER_BIT; +        } +        const GLenum attachment = AttachmentType(image_view->format); +        AttachTexture(handle, attachment, image_view); +    } -    const GLsizei width = static_cast<GLsizei>(dst_params.width); -    const GLsizei height = static_cast<GLsizei>(dst_params.height); -    const GLsizei depth = static_cast<GLsizei>(dst_params.depth); -    if (dst_surface->IsCompressed()) { -        LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); -        UNREACHABLE(); +    if (num_buffers > 1) { +        glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data()); +    } else if (num_buffers > 0) { +        glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]);      } else { -        switch (dst_params.target) { -        case SurfaceTarget::Texture1D: -            glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, -                                dest_format.type, nullptr); -            break; -        case SurfaceTarget::Texture2D: -            glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, -                                dest_format.format, dest_format.type, nullptr); -            break; -        case SurfaceTarget::Texture3D: -        case SurfaceTarget::Texture2DArray: -        case SurfaceTarget::TextureCubeArray: -            glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, -                                dest_format.format, dest_format.type, nullptr); -            break; -        case SurfaceTarget::TextureCubemap: -            glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, -                                dest_format.format, dest_format.type, nullptr); -            break; -        default: -            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target); -            UNREACHABLE(); -        } +        glNamedFramebufferDrawBuffer(handle, GL_NONE);      } -    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); -    glTextureBarrier(); -} +    glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width); +    glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height); +    // TODO +    // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...); +    // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...); +    // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...); -GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { -    ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); -    const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); -    OGLBuffer& cp = copy_pbo_cache[l2]; -    if (cp.handle == 0) { -        const std::size_t ceil_size = 1ULL << l2; -        cp.Create(); -        cp.MakeStreamCopy(ceil_size); +    if (runtime.device.HasDebuggingToolAttached()) { +        const std::string name = VideoCommon::Name(key); +        glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());      } -    return cp.handle; +    framebuffer.handle = handle;  }  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 72b284fab..04193e31e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -4,157 +4,247 @@  #pragma once -#include <array> -#include <functional>  #include <memory> -#include <unordered_map> -#include <utility> -#include <vector> +#include <span>  #include <glad/glad.h> -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/util_shaders.h"  #include "video_core/texture_cache/texture_cache.h"  namespace OpenGL { -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; - -class CachedSurfaceView; -class CachedSurface; -class TextureCacheOpenGL; +class Device; +class ProgramManager;  class StateTracker; -using Surface = std::shared_ptr<CachedSurface>; -using View = std::shared_ptr<CachedSurfaceView>; -using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; +class Framebuffer; +class Image; +class ImageView; +class Sampler; -class CachedSurface final : public VideoCommon::SurfaceBase<View> { -    friend CachedSurfaceView; +using VideoCommon::ImageId; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +class ImageBufferMap {  public: -    explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, -                           bool is_astc_supported_); -    ~CachedSurface(); - -    void UploadTexture(const std::vector<u8>& staging_buffer) override; -    void DownloadTexture(std::vector<u8>& staging_buffer) override; +    explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); +    ~ImageBufferMap(); -    GLenum GetTarget() const { -        return target; +    GLuint Handle() const noexcept { +        return handle;      } -    GLuint GetTexture() const { -        return texture.handle; +    std::span<u8> Span() const noexcept { +        return span;      } -    bool IsCompressed() const { -        return is_compressed; +private: +    std::span<u8> span; +    OGLSync* sync; +    GLuint handle; +}; + +struct FormatProperties { +    GLenum compatibility_class; +    bool compatibility_by_size; +    bool is_compressed; +}; + +class TextureCacheRuntime { +    friend Framebuffer; +    friend Image; +    friend ImageView; +    friend Sampler; + +public: +    explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, +                                 StateTracker& state_tracker); +    ~TextureCacheRuntime(); + +    void Finish(); + +    ImageBufferMap MapUploadBuffer(size_t size); + +    ImageBufferMap MapDownloadBuffer(size_t size); + +    void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + +    void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { +        UNIMPLEMENTED();      } -protected: -    void DecorateSurfaceName() override; +    bool CanImageBeCopied(const Image& dst, const Image& src); + +    void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + +    void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, +                         const std::array<Offset2D, 2>& dst_region, +                         const std::array<Offset2D, 2>& src_region, +                         Tegra::Engines::Fermi2D::Filter filter, +                         Tegra::Engines::Fermi2D::Operation operation); -    View CreateView(const ViewParams& view_key) override; -    View CreateViewInner(const ViewParams& view_key, bool is_proxy); +    void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, +                               std::span<const VideoCommon::SwizzleParameters> swizzles); + +    void InsertUploadMemoryBarrier(); + +    FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;  private: -    void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); +    struct StagingBuffers { +        explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); +        ~StagingBuffers(); -    GLenum internal_format{}; -    GLenum format{}; -    GLenum type{}; -    bool is_compressed{}; -    GLenum target{}; -    u32 view_count{}; +        ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); -    OGLTexture texture; -    OGLBuffer texture_buffer; +        size_t RequestBuffer(size_t requested_size); + +        std::optional<size_t> FindBuffer(size_t requested_size); + +        std::vector<OGLSync> syncs; +        std::vector<OGLBuffer> buffers; +        std::vector<u8*> maps; +        std::vector<size_t> sizes; +        GLenum storage_flags; +        GLenum map_flags; +    }; + +    const Device& device; +    StateTracker& state_tracker; +    UtilShaders util_shaders; + +    std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; + +    StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; +    StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; + +    OGLTexture null_image_1d_array; +    OGLTexture null_image_cube_array; +    OGLTexture null_image_3d; +    OGLTexture null_image_rect; +    OGLTextureView null_image_view_1d; +    OGLTextureView null_image_view_2d; +    OGLTextureView null_image_view_2d_array; +    OGLTextureView null_image_view_cube; + +    std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;  }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class Image : public VideoCommon::ImageBase { +    friend ImageView; +  public: -    explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); -    ~CachedSurfaceView(); +    explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, +                   VAddr cpu_addr); -    /// @brief Attaches this texture view to the currently bound fb_target framebuffer -    /// @param attachment   Attachment to bind textures to -    /// @param fb_target    Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) -    void Attach(GLenum attachment, GLenum fb_target) const; +    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                      std::span<const VideoCommon::BufferImageCopy> copies); -    GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, -                      Tegra::Texture::SwizzleSource y_source, -                      Tegra::Texture::SwizzleSource z_source, -                      Tegra::Texture::SwizzleSource w_source); +    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                      std::span<const VideoCommon::BufferCopy> copies); -    void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); +    void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, +                        std::span<const VideoCommon::BufferImageCopy> copies); -    void MarkAsModified(u64 tick) { -        surface.MarkAsModified(true, tick); +    GLuint Handle() const noexcept { +        return texture.handle;      } -    GLuint GetTexture() const { -        if (is_proxy) { -            return surface.GetTexture(); -        } -        return main_view.handle; +private: +    void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + +    void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + +    OGLTexture texture; +    OGLTextureView store_view; +    OGLBuffer buffer; +    GLenum gl_internal_format = GL_NONE; +    GLenum gl_store_format = GL_NONE; +    GLenum gl_format = GL_NONE; +    GLenum gl_type = GL_NONE; +}; + +class ImageView : public VideoCommon::ImageViewBase { +    friend Image; + +public: +    explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); +    explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + +    [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { +        return views[static_cast<size_t>(query_type)];      } -    GLenum GetFormat() const { -        return format; +    [[nodiscard]] GLuint DefaultHandle() const noexcept { +        return default_handle;      } -    const SurfaceParams& GetSurfaceParams() const { -        return surface.GetSurfaceParams(); +    [[nodiscard]] GLenum Format() const noexcept { +        return internal_format;      }  private: -    OGLTextureView CreateTextureView() const; +    void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, +                   const VideoCommon::ImageViewInfo& info, +                   VideoCommon::SubresourceRange view_range); + +    std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; +    std::vector<OGLTextureView> stored_views; +    GLuint default_handle = 0; +    GLenum internal_format = GL_NONE; +}; + +class ImageAlloc : public VideoCommon::ImageAllocBase {}; -    CachedSurface& surface; -    const GLenum format; -    const GLenum target; -    const bool is_proxy; +class Sampler { +public: +    explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); -    std::unordered_map<u32, OGLTextureView> view_cache; -    OGLTextureView main_view; +    GLuint Handle() const noexcept { +        return sampler.handle; +    } -    // Use an invalid default so it always fails the comparison test -    u32 current_swizzle = 0xffffffff; -    GLuint current_view = 0; +private: +    OGLSampler sampler;  }; -class TextureCacheOpenGL final : public TextureCacheBase { +class Framebuffer {  public: -    explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, -                                Tegra::Engines::Maxwell3D& maxwell3d_, -                                Tegra::MemoryManager& gpu_memory_, const Device& device_, -                                StateTracker& state_tracker); -    ~TextureCacheOpenGL(); - -protected: -    Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - -    void ImageCopy(Surface& src_surface, Surface& dst_surface, -                   const VideoCommon::CopyParams& copy_params) override; +    explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, +                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key); -    void ImageBlit(View& src_view, View& dst_view, -                   const Tegra::Engines::Fermi2D::Config& copy_config) override; +    [[nodiscard]] GLuint Handle() const noexcept { +        return framebuffer.handle; +    } -    void BufferCopy(Surface& src_surface, Surface& dst_surface) override; +    [[nodiscard]] GLbitfield BufferBits() const noexcept { +        return buffer_bits; +    }  private: -    GLuint FetchPBO(std::size_t buffer_size); - -    StateTracker& state_tracker; +    OGLFramebuffer framebuffer; +    GLbitfield buffer_bits = GL_NONE; +}; -    OGLFramebuffer src_framebuffer; -    OGLFramebuffer dst_framebuffer; -    std::unordered_map<u32, OGLBuffer> copy_pbo_cache; +struct TextureCacheParams { +    static constexpr bool ENABLE_VALIDATION = true; +    static constexpr bool FRAMEBUFFER_BLITS = true; +    static constexpr bool HAS_EMULATED_COPIES = true; + +    using Runtime = OpenGL::TextureCacheRuntime; +    using Image = OpenGL::Image; +    using ImageAlloc = OpenGL::ImageAlloc; +    using ImageView = OpenGL::ImageView; +    using Sampler = OpenGL::Sampler; +    using Framebuffer = OpenGL::Framebuffer;  }; +using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; +  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index dd4ee3361..cbccfdeb4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {      return GL_FILL;  } +inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) { +    switch (filter) { +    case Tegra::Texture::SamplerReduction::WeightedAverage: +        return GL_WEIGHTED_AVERAGE_ARB; +    case Tegra::Texture::SamplerReduction::Min: +        return GL_MIN; +    case Tegra::Texture::SamplerReduction::Max: +        return GL_MAX; +    } +    UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter)); +    return GL_WEIGHTED_AVERAGE_ARB; +} +  inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {      // Enumeration order matches register order. We can convert it arithmetically.      return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cbfaaa99c..dd77a543c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -23,10 +23,10 @@  #include "core/telemetry_session.h"  #include "video_core/host_shaders/opengl_present_frag.h"  #include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/morton.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_shader_manager.h"  #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/textures/decoders.h"  namespace OpenGL { @@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {      if (!framebuffer) {          return;      } -      PrepareRendertarget(framebuffer);      RenderScreenshot(); -    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); +    state_tracker.BindFramebuffer(0);      DrawScreen(emu_window.GetFramebufferLayout());      ++m_current_frame; @@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf      // Reset the screen info's display texture to its own permanent texture      screen_info.display_texture = screen_info.texture.resource.handle; -    const auto pixel_format{ -        VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; -    const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; -    const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; -    u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; -    rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); -      // TODO(Rodrigo): Read this from HLE      constexpr u32 block_height_log2 = 4; -    VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, -                             framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, -                             gl_framebuffer_data.data(), host_ptr); - +    const auto pixel_format{ +        VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; +    const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; +    const u64 size_in_bytes{Tegra::Texture::CalculateSize( +        true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; +    const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; +    const std::span<const u8> input_data(host_ptr, size_in_bytes); +    Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, +                                     framebuffer.width, framebuffer.height, 1, block_height_log2, +                                     0); + +    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);      glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));      // Update existing texture @@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {      glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);      glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); +    // Generate presentation sampler +    present_sampler.Create(); +    glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); +      // Generate VBO handle for drawing      vertex_buffer.Create(); @@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {      // Clear screen to black      LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); +    // Enable seamless cubemaps when per texture parameters are not available +    if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { +        glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); +    } +      // Enable unified vertex attributes and query vertex buffer address when the driver supports it      if (device.HasVertexBufferUnifiedMemory()) {          glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); @@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,      const auto pixel_format{          VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; -    const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; +    const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};      gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);      GLint internal_format; @@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,          internal_format = GL_RGBA8;          texture.gl_format = GL_RGBA;          texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; -        UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", -                          static_cast<u32>(framebuffer.pixel_format)); +        // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", +        //                   static_cast<u32>(framebuffer.pixel_format));      }      texture.resource.Release(); @@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {      state_tracker.NotifyPolygonModes();      state_tracker.NotifyViewport0();      state_tracker.NotifyScissor0(); -    state_tracker.NotifyColorMask0(); +    state_tracker.NotifyColorMask(0);      state_tracker.NotifyBlend0();      state_tracker.NotifyFramebuffer();      state_tracker.NotifyFrontFace(); @@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {      }      glBindTextureUnit(0, screen_info.display_texture); -    glBindSampler(0, 0); +    glBindSampler(0, present_sampler.handle);      glClear(GL_COLOR_BUFFER_BIT);      glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); @@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {      DrawScreen(layout); +    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); +    glPixelStorei(GL_PACK_ROW_LENGTH, 0);      glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,                   renderer_settings.screenshot_bits); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 376f88766..44e109794 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -102,6 +102,7 @@ private:      StateTracker state_tracker{gpu};      // OpenGL object IDs +    OGLSampler present_sampler;      OGLBuffer vertex_buffer;      OGLProgram vertex_program;      OGLProgram fragment_program; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp new file mode 100644 index 000000000..eb849cbf2 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -0,0 +1,224 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <bit> +#include <span> +#include <string_view> + +#include <glad/glad.h> + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" +#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/opengl_copy_bc4_comp.h" +#include "video_core/host_shaders/pitch_unswizzle_comp.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +using namespace HostShaders; + +using VideoCommon::Extent3D; +using VideoCommon::ImageCopy; +using VideoCommon::ImageType; +using VideoCommon::SwizzleParameters; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; +using VideoCore::Surface::BytesPerBlock; + +namespace { + +OGLProgram MakeProgram(std::string_view source) { +    OGLShader shader; +    shader.Create(source, GL_COMPUTE_SHADER); + +    OGLProgram program; +    program.Create(true, false, shader.handle); +    return program; +} + +} // Anonymous namespace + +UtilShaders::UtilShaders(ProgramManager& program_manager_) +    : program_manager{program_manager_}, +      block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), +      block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), +      pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), +      copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { +    const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); +    swizzle_table_buffer.Create(); +    glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); +} + +UtilShaders::~UtilShaders() = default; + +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +                                      std::span<const SwizzleParameters> swizzles) { +    static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; +    static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; +    static constexpr GLuint BINDING_INPUT_BUFFER = 1; +    static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + +    program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); +    glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); +    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + +    const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); +    for (const SwizzleParameters& swizzle : swizzles) { +        const Extent3D num_tiles = swizzle.num_tiles; +        const size_t input_offset = swizzle.buffer_offset + buffer_offset; + +        const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); +        const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + +        const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); +        glUniform3uiv(0, 1, params.origin.data()); +        glUniform3iv(1, 1, params.destination.data()); +        glUniform1ui(2, params.bytes_per_block_log2); +        glUniform1ui(3, params.layer_stride); +        glUniform1ui(4, params.block_size); +        glUniform1ui(5, params.x_shift); +        glUniform1ui(6, params.block_height); +        glUniform1ui(7, params.block_height_mask); +        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), +                          input_offset, image.guest_size_bytes - swizzle.buffer_offset); +        glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, +                           GL_WRITE_ONLY, store_format); +        glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); +    } +    program_manager.RestoreGuestCompute(); +} + +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +                                      std::span<const SwizzleParameters> swizzles) { +    static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; + +    static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; +    static constexpr GLuint BINDING_INPUT_BUFFER = 1; +    static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + +    glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); +    program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); +    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + +    const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); +    for (const SwizzleParameters& swizzle : swizzles) { +        const Extent3D num_tiles = swizzle.num_tiles; +        const size_t input_offset = swizzle.buffer_offset + buffer_offset; + +        const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); +        const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); +        const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); + +        const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); +        glUniform3uiv(0, 1, params.origin.data()); +        glUniform3iv(1, 1, params.destination.data()); +        glUniform1ui(2, params.bytes_per_block_log2); +        glUniform1ui(3, params.slice_size); +        glUniform1ui(4, params.block_size); +        glUniform1ui(5, params.x_shift); +        glUniform1ui(6, params.block_height); +        glUniform1ui(7, params.block_height_mask); +        glUniform1ui(8, params.block_depth); +        glUniform1ui(9, params.block_depth_mask); +        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), +                          input_offset, image.guest_size_bytes - swizzle.buffer_offset); +        glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, +                           GL_WRITE_ONLY, store_format); +        glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); +    } +    program_manager.RestoreGuestCompute(); +} + +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, +                              std::span<const SwizzleParameters> swizzles) { +    static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; +    static constexpr GLuint BINDING_INPUT_BUFFER = 0; +    static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; +    static constexpr GLuint LOC_ORIGIN = 0; +    static constexpr GLuint LOC_DESTINATION = 1; +    static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; +    static constexpr GLuint LOC_PITCH = 3; + +    const u32 bytes_per_block = BytesPerBlock(image.info.format); +    const GLenum format = StoreFormat(bytes_per_block); +    const u32 pitch = image.info.pitch; + +    UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), +                         "Non-power of two images are not implemented"); + +    program_manager.BindHostCompute(pitch_unswizzle_program.handle); +    glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); +    glUniform2ui(LOC_ORIGIN, 0, 0); +    glUniform2i(LOC_DESTINATION, 0, 0); +    glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); +    glUniform1ui(LOC_PITCH, pitch); +    glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); +    for (const SwizzleParameters& swizzle : swizzles) { +        const Extent3D num_tiles = swizzle.num_tiles; +        const size_t input_offset = swizzle.buffer_offset + buffer_offset; + +        const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); +        const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + +        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), +                          input_offset, image.guest_size_bytes - swizzle.buffer_offset); +        glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); +    } +    program_manager.RestoreGuestCompute(); +} + +void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) { +    static constexpr GLuint BINDING_INPUT_IMAGE = 0; +    static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; +    static constexpr GLuint LOC_SRC_OFFSET = 0; +    static constexpr GLuint LOC_DST_OFFSET = 1; + +    program_manager.BindHostCompute(copy_bc4_program.handle); + +    for (const ImageCopy& copy : copies) { +        ASSERT(copy.src_subresource.base_layer == 0); +        ASSERT(copy.src_subresource.num_layers == 1); +        ASSERT(copy.dst_subresource.base_layer == 0); +        ASSERT(copy.dst_subresource.num_layers == 1); + +        glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); +        glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); +        glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, +                           GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); +        glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), +                           copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); +        glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); +    } +    program_manager.RestoreGuestCompute(); +} + +GLenum StoreFormat(u32 bytes_per_block) { +    switch (bytes_per_block) { +    case 1: +        return GL_R8UI; +    case 2: +        return GL_R16UI; +    case 4: +        return GL_R32UI; +    case 8: +        return GL_RG32UI; +    case 16: +        return GL_RGBA32UI; +    } +    UNREACHABLE(); +    return GL_R8UI; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h new file mode 100644 index 000000000..359997255 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <span> + +#include <glad/glad.h> + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +class Image; +class ImageBufferMap; +class ProgramManager; + +class UtilShaders { +public: +    explicit UtilShaders(ProgramManager& program_manager); +    ~UtilShaders(); + +    void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +                             std::span<const VideoCommon::SwizzleParameters> swizzles); + +    void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +                             std::span<const VideoCommon::SwizzleParameters> swizzles); + +    void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, +                     std::span<const VideoCommon::SwizzleParameters> swizzles); + +    void CopyBC4(Image& dst_image, Image& src_image, +                 std::span<const VideoCommon::ImageCopy> copies); + +private: +    ProgramManager& program_manager; + +    OGLBuffer swizzle_table_buffer; + +    OGLProgram block_linear_unswizzle_2d_program; +    OGLProgram block_linear_unswizzle_3d_program; +    OGLProgram pitch_unswizzle_program; +    OGLProgram copy_bc4_program; +}; + +GLenum StoreFormat(u32 bytes_per_block); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp deleted file mode 100644 index 6d7bb16b2..000000000 --- a/src/video_core/renderer_opengl/utils.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <string> -#include <vector> - -#include <fmt/format.h> -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { -    if (!GLAD_GL_KHR_debug) { -        // We don't need to throw an error as this is just for debugging -        return; -    } - -    std::string object_label; -    if (extra_info.empty()) { -        switch (identifier) { -        case GL_TEXTURE: -            object_label = fmt::format("Texture@0x{:016X}", addr); -            break; -        case GL_PROGRAM: -            object_label = fmt::format("Shader@0x{:016X}", addr); -            break; -        default: -            object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); -            break; -        } -    } else { -        object_label = fmt::format("{}@0x{:016X}", extra_info, addr); -    } -    glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h deleted file mode 100644 index 9c09ee12c..000000000 --- a/src/video_core/renderer_opengl/utils.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <string_view> -#include <vector> -#include <glad/glad.h> -#include "common/common_types.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp new file mode 100644 index 000000000..87c8e5693 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -0,0 +1,624 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" +#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" +#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" +#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" +#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/renderer_vulkan/blit_image.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/surface.h" + +namespace Vulkan { + +using VideoCommon::ImageViewType; + +namespace { +struct PushConstants { +    std::array<float, 2> tex_scale; +    std::array<float, 2> tex_offset; +}; + +template <u32 binding> +inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{ +    .binding = binding, +    .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +    .descriptorCount = 1, +    .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, +    .pImmutableSamplers = nullptr, +}; +constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{ +    TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, +    TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>, +}; +constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .bindingCount = 1, +    .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, +}; +constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), +    .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), +}; +constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ +    .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, +    .offset = 0, +    .size = sizeof(PushConstants), +}; +constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .vertexBindingDescriptionCount = 0, +    .pVertexBindingDescriptions = nullptr, +    .vertexAttributeDescriptionCount = 0, +    .pVertexAttributeDescriptions = nullptr, +}; +constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, +    .primitiveRestartEnable = VK_FALSE, +}; +constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .viewportCount = 1, +    .pViewports = nullptr, +    .scissorCount = 1, +    .pScissors = nullptr, +}; +constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .depthClampEnable = VK_FALSE, +    .rasterizerDiscardEnable = VK_FALSE, +    .polygonMode = VK_POLYGON_MODE_FILL, +    .cullMode = VK_CULL_MODE_BACK_BIT, +    .frontFace = VK_FRONT_FACE_CLOCKWISE, +    .depthBiasEnable = VK_FALSE, +    .depthBiasConstantFactor = 0.0f, +    .depthBiasClamp = 0.0f, +    .depthBiasSlopeFactor = 0.0f, +    .lineWidth = 1.0f, +}; +constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, +    .sampleShadingEnable = VK_FALSE, +    .minSampleShading = 0.0f, +    .pSampleMask = nullptr, +    .alphaToCoverageEnable = VK_FALSE, +    .alphaToOneEnable = VK_FALSE, +}; +constexpr std::array DYNAMIC_STATES{ +    VK_DYNAMIC_STATE_VIEWPORT, +    VK_DYNAMIC_STATE_SCISSOR, +}; +constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()), +    .pDynamicStates = DYNAMIC_STATES.data(), +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .logicOpEnable = VK_FALSE, +    .logicOp = VK_LOGIC_OP_CLEAR, +    .attachmentCount = 0, +    .pAttachments = nullptr, +    .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{ +    .blendEnable = VK_FALSE, +    .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, +    .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, +    .colorBlendOp = VK_BLEND_OP_ADD, +    .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, +    .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, +    .alphaBlendOp = VK_BLEND_OP_ADD, +    .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | +                      VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .logicOpEnable = VK_FALSE, +    .logicOp = VK_LOGIC_OP_CLEAR, +    .attachmentCount = 1, +    .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE, +    .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .depthTestEnable = VK_TRUE, +    .depthWriteEnable = VK_TRUE, +    .depthCompareOp = VK_COMPARE_OP_ALWAYS, +    .depthBoundsTestEnable = VK_FALSE, +    .stencilTestEnable = VK_FALSE, +    .front = VkStencilOpState{}, +    .back = VkStencilOpState{}, +    .minDepthBounds = 0.0f, +    .maxDepthBounds = 0.0f, +}; + +template <VkFilter filter> +inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ +    .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, +    .pNext = nullptr, +    .flags = 0, +    .magFilter = filter, +    .minFilter = filter, +    .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, +    .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, +    .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, +    .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, +    .mipLodBias = 0.0f, +    .anisotropyEnable = VK_FALSE, +    .maxAnisotropy = 0.0f, +    .compareEnable = VK_FALSE, +    .compareOp = VK_COMPARE_OP_NEVER, +    .minLod = 0.0f, +    .maxLod = 0.0f, +    .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE, +    .unnormalizedCoordinates = VK_TRUE, +}; + +constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( +    const VkDescriptorSetLayout* set_layout) { +    return VkPipelineLayoutCreateInfo{ +        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .setLayoutCount = 1, +        .pSetLayouts = set_layout, +        .pushConstantRangeCount = 1, +        .pPushConstantRanges = &PUSH_CONSTANT_RANGE, +    }; +} + +constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage, +                                                                        VkShaderModule shader) { +    return VkPipelineShaderStageCreateInfo{ +        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .stage = stage, +        .module = shader, +        .pName = "main", +        .pSpecializationInfo = nullptr, +    }; +} + +constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages( +    VkShaderModule vertex_shader, VkShaderModule fragment_shader) { +    return std::array{ +        PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader), +        PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader), +    }; +} + +void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, +                                   VkSampler sampler, VkImageView image_view) { +    const VkDescriptorImageInfo image_info{ +        .sampler = sampler, +        .imageView = image_view, +        .imageLayout = VK_IMAGE_LAYOUT_GENERAL, +    }; +    const VkWriteDescriptorSet write_descriptor_set{ +        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, +        .pNext = nullptr, +        .dstSet = descriptor_set, +        .dstBinding = 0, +        .dstArrayElement = 0, +        .descriptorCount = 1, +        .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +        .pImageInfo = &image_info, +        .pBufferInfo = nullptr, +        .pTexelBufferView = nullptr, +    }; +    device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); +} + +void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, +                                    VkSampler sampler, VkImageView image_view_0, +                                    VkImageView image_view_1) { +    const VkDescriptorImageInfo image_info_0{ +        .sampler = sampler, +        .imageView = image_view_0, +        .imageLayout = VK_IMAGE_LAYOUT_GENERAL, +    }; +    const VkDescriptorImageInfo image_info_1{ +        .sampler = sampler, +        .imageView = image_view_1, +        .imageLayout = VK_IMAGE_LAYOUT_GENERAL, +    }; +    const std::array write_descriptor_sets{ +        VkWriteDescriptorSet{ +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, +            .pNext = nullptr, +            .dstSet = descriptor_set, +            .dstBinding = 0, +            .dstArrayElement = 0, +            .descriptorCount = 1, +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +            .pImageInfo = &image_info_0, +            .pBufferInfo = nullptr, +            .pTexelBufferView = nullptr, +        }, +        VkWriteDescriptorSet{ +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, +            .pNext = nullptr, +            .dstSet = descriptor_set, +            .dstBinding = 1, +            .dstArrayElement = 0, +            .descriptorCount = 1, +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +            .pImageInfo = &image_info_1, +            .pBufferInfo = nullptr, +            .pTexelBufferView = nullptr, +        }, +    }; +    device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, +                   const std::array<Offset2D, 2>& dst_region, +                   const std::array<Offset2D, 2>& src_region) { +    const VkOffset2D offset{ +        .x = std::min(dst_region[0].x, dst_region[1].x), +        .y = std::min(dst_region[0].y, dst_region[1].y), +    }; +    const VkExtent2D extent{ +        .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)), +        .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)), +    }; +    const VkViewport viewport{ +        .x = static_cast<float>(offset.x), +        .y = static_cast<float>(offset.y), +        .width = static_cast<float>(extent.width), +        .height = static_cast<float>(extent.height), +        .minDepth = 0.0f, +        .maxDepth = 1.0f, +    }; +    // TODO: Support scissored blits +    const VkRect2D scissor{ +        .offset = offset, +        .extent = extent, +    }; +    const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x); +    const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y); +    const PushConstants push_constants{ +        .tex_scale = {scale_x, scale_y}, +        .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)}, +    }; +    cmdbuf.SetViewport(0, viewport); +    cmdbuf.SetScissor(0, scissor); +    cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); +} + +} // Anonymous namespace + +BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_, +                                 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) +    : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, +      one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( +          ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), +      two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( +          TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), +      one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), +      two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), +      one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( +          PipelineLayoutCreateInfo(one_texture_set_layout.address()))), +      two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( +          PipelineLayoutCreateInfo(two_textures_set_layout.address()))), +      full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), +      blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), +      convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), +      convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), +      linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), +      nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { +    if (device.IsExtShaderStencilExportSupported()) { +        blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV); +    } +} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, +                                const std::array<Offset2D, 2>& dst_region, +                                const std::array<Offset2D, 2>& src_region, +                                Tegra::Engines::Fermi2D::Filter filter, +                                Tegra::Engines::Fermi2D::Operation operation) { +    const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; +    const BlitImagePipelineKey key{ +        .renderpass = dst_framebuffer->RenderPass(), +        .operation = operation, +    }; +    const VkPipelineLayout layout = *one_texture_pipeline_layout; +    const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); +    const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; +    const VkPipeline pipeline = FindOrEmplacePipeline(key); +    const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); +    scheduler.RequestRenderpass(dst_framebuffer); +    scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, +                      &device = device](vk::CommandBuffer cmdbuf) { +        // TODO: Barriers +        UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); +        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); +        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, +                                  nullptr); +        BindBlitState(cmdbuf, layout, dst_region, src_region); +        cmdbuf.Draw(3, 1, 0, 0); +    }); +    scheduler.InvalidateState(); +} + +void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, +                                       VkImageView src_depth_view, VkImageView src_stencil_view, +                                       const std::array<Offset2D, 2>& dst_region, +                                       const std::array<Offset2D, 2>& src_region, +                                       Tegra::Engines::Fermi2D::Filter filter, +                                       Tegra::Engines::Fermi2D::Operation operation) { +    ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); +    ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); + +    const VkPipelineLayout layout = *two_textures_pipeline_layout; +    const VkSampler sampler = *nearest_sampler; +    const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); +    const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); +    scheduler.RequestRenderpass(dst_framebuffer); +    scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, +                      src_stencil_view, descriptor_set, +                      &device = device](vk::CommandBuffer cmdbuf) { +        // TODO: Barriers +        UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, +                                       src_stencil_view); +        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); +        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, +                                  nullptr); +        BindBlitState(cmdbuf, layout, dst_region, src_region); +        cmdbuf.Draw(3, 1, 0, 0); +    }); +    scheduler.InvalidateState(); +} + +void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, +                                      const ImageView& src_image_view) { +    ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); +    Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, +                                      const ImageView& src_image_view) { + +    ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); +    Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, +                                      const ImageView& src_image_view) { +    ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); +    Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, +                                      const ImageView& src_image_view) { +    ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); +    Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, +                              const ImageView& src_image_view) { +    const VkPipelineLayout layout = *one_texture_pipeline_layout; +    const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); +    const VkSampler sampler = *nearest_sampler; +    const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); +    const VkExtent2D extent{ +        .width = src_image_view.size.width, +        .height = src_image_view.size.height, +    }; +    scheduler.RequestRenderpass(dst_framebuffer); +    scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, +                      &device = device](vk::CommandBuffer cmdbuf) { +        const VkOffset2D offset{ +            .x = 0, +            .y = 0, +        }; +        const VkViewport viewport{ +            .x = 0.0f, +            .y = 0.0f, +            .width = static_cast<float>(extent.width), +            .height = static_cast<float>(extent.height), +            .minDepth = 0.0f, +            .maxDepth = 0.0f, +        }; +        const VkRect2D scissor{ +            .offset = offset, +            .extent = extent, +        }; +        const PushConstants push_constants{ +            .tex_scale = {viewport.width, viewport.height}, +            .tex_offset = {0.0f, 0.0f}, +        }; +        UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + +        // TODO: Barriers +        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); +        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, +                                  nullptr); +        cmdbuf.SetViewport(0, viewport); +        cmdbuf.SetScissor(0, scissor); +        cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); +        cmdbuf.Draw(3, 1, 0, 0); +    }); +    scheduler.InvalidateState(); +} + +VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { +    const auto it = std::ranges::find(blit_color_keys, key); +    if (it != blit_color_keys.end()) { +        return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; +    } +    blit_color_keys.push_back(key); + +    const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag); +    const VkPipelineColorBlendAttachmentState blend_attachment{ +        .blendEnable = VK_FALSE, +        .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, +        .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, +        .colorBlendOp = VK_BLEND_OP_ADD, +        .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, +        .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, +        .alphaBlendOp = VK_BLEND_OP_ADD, +        .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | +                          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, +    }; +    // TODO: programmable blending +    const VkPipelineColorBlendStateCreateInfo color_blend_create_info{ +        .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .logicOpEnable = VK_FALSE, +        .logicOp = VK_LOGIC_OP_CLEAR, +        .attachmentCount = 1, +        .pAttachments = &blend_attachment, +        .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +    }; +    blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ +        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .stageCount = static_cast<u32>(stages.size()), +        .pStages = stages.data(), +        .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, +        .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, +        .pTessellationState = nullptr, +        .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, +        .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, +        .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, +        .pDepthStencilState = nullptr, +        .pColorBlendState = &color_blend_create_info, +        .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, +        .layout = *one_texture_pipeline_layout, +        .renderPass = key.renderpass, +        .subpass = 0, +        .basePipelineHandle = VK_NULL_HANDLE, +        .basePipelineIndex = 0, +    })); +    return *blit_color_pipelines.back(); +} + +VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { +    if (blit_depth_stencil_pipeline) { +        return *blit_depth_stencil_pipeline; +    } +    const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); +    blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ +        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .stageCount = static_cast<u32>(stages.size()), +        .pStages = stages.data(), +        .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, +        .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, +        .pTessellationState = nullptr, +        .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, +        .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, +        .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, +        .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, +        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, +        .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, +        .layout = *two_textures_pipeline_layout, +        .renderPass = renderpass, +        .subpass = 0, +        .basePipelineHandle = VK_NULL_HANDLE, +        .basePipelineIndex = 0, +    }); +    return *blit_depth_stencil_pipeline; +} + +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { +    if (pipeline) { +        return; +    } +    const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); +    pipeline = device.GetLogical().CreateGraphicsPipeline({ +        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .stageCount = static_cast<u32>(stages.size()), +        .pStages = stages.data(), +        .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, +        .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, +        .pTessellationState = nullptr, +        .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, +        .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, +        .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, +        .pDepthStencilState = nullptr, +        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, +        .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, +        .layout = *one_texture_pipeline_layout, +        .renderPass = renderpass, +        .subpass = 0, +        .basePipelineHandle = VK_NULL_HANDLE, +        .basePipelineIndex = 0, +    }); +} + +void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { +    if (pipeline) { +        return; +    } +    const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); +    pipeline = device.GetLogical().CreateGraphicsPipeline({ +        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .stageCount = static_cast<u32>(stages.size()), +        .pStages = stages.data(), +        .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, +        .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, +        .pTessellationState = nullptr, +        .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, +        .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, +        .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, +        .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, +        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, +        .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, +        .layout = *one_texture_pipeline_layout, +        .renderPass = renderpass, +        .subpass = 0, +        .basePipelineHandle = VK_NULL_HANDLE, +        .basePipelineIndex = 0, +    }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h new file mode 100644 index 000000000..2c2790bf9 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <compare> + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/texture_cache/types.h" + +namespace Vulkan { + +using VideoCommon::Offset2D; + +class VKDevice; +class VKScheduler; +class StateTracker; + +class Framebuffer; +class ImageView; + +struct BlitImagePipelineKey { +    constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; + +    VkRenderPass renderpass; +    Tegra::Engines::Fermi2D::Operation operation; +}; + +class BlitImageHelper { +public: +    explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler, +                             StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); +    ~BlitImageHelper(); + +    void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, +                   const std::array<Offset2D, 2>& dst_region, +                   const std::array<Offset2D, 2>& src_region, +                   Tegra::Engines::Fermi2D::Filter filter, +                   Tegra::Engines::Fermi2D::Operation operation); + +    void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, +                          VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region, +                          const std::array<Offset2D, 2>& src_region, +                          Tegra::Engines::Fermi2D::Filter filter, +                          Tegra::Engines::Fermi2D::Operation operation); + +    void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + +    void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + +    void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + +    void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + +private: +    void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, +                 const ImageView& src_image_view); + +    [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); + +    [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); + +    void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + +    void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + +    const VKDevice& device; +    VKScheduler& scheduler; +    StateTracker& state_tracker; + +    vk::DescriptorSetLayout one_texture_set_layout; +    vk::DescriptorSetLayout two_textures_set_layout; +    DescriptorAllocator one_texture_descriptor_allocator; +    DescriptorAllocator two_textures_descriptor_allocator; +    vk::PipelineLayout one_texture_pipeline_layout; +    vk::PipelineLayout two_textures_pipeline_layout; +    vk::ShaderModule full_screen_vert; +    vk::ShaderModule blit_color_to_color_frag; +    vk::ShaderModule blit_depth_stencil_frag; +    vk::ShaderModule convert_depth_to_float_frag; +    vk::ShaderModule convert_float_to_depth_frag; +    vk::Sampler linear_sampler; +    vk::Sampler nearest_sampler; + +    std::vector<BlitImagePipelineKey> blit_color_keys; +    std::vector<vk::Pipeline> blit_color_pipelines; +    vk::Pipeline blit_depth_stencil_pipeline; +    vk::Pipeline convert_d32_to_r32_pipeline; +    vk::Pipeline convert_r32_to_d32_pipeline; +    vk::Pipeline convert_d16_to_r16_pipeline; +    vk::Pipeline convert_r16_to_d16_pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5ec43db11..67dd10500 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta      logic_op.Assign(PackLogicOp(regs.logic_op.operation));      rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);      topology.Assign(regs.draw.topology); +    msaa_mode.Assign(regs.multisample_mode);      raw2 = 0;      const auto test_func = diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c26b77790..7e95e6fce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -186,6 +186,7 @@ struct FixedPipelineState {          BitField<19, 4, u32> logic_op;          BitField<23, 1, u32> rasterize_enable;          BitField<24, 4, Maxwell::PrimitiveTopology> topology; +        BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;      };      union {          u32 raw2; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 58e117eb3..40501e7fa 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -122,7 +122,7 @@ struct FormatTuple {      {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage},     // A8B8G8R8_SINT      {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage},     // A8B8G8R8_UINT      {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable},                // R5G6B5_UNORM -    {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable},                // B5G6R5_UNORM +    {VK_FORMAT_B5G6R5_UNORM_PACK16},                            // B5G6R5_UNORM      {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},              // A1R5G5B5_UNORM      {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM      {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage},  // A2B10G10R10_UINT @@ -163,7 +163,7 @@ struct FormatTuple {      {VK_FORMAT_R16G16_UNORM, Attachable | Storage},            // R16G16_UNORM      {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage},           // R16G16_FLOAT      {VK_FORMAT_UNDEFINED},                                     // R16G16_UINT -    {VK_FORMAT_UNDEFINED},                                     // R16G16_SINT +    {VK_FORMAT_R16G16_SINT, Attachable | Storage},             // R16G16_SINT      {VK_FORMAT_R16G16_SNORM, Attachable | Storage},            // R16G16_SNORM      {VK_FORMAT_UNDEFINED},                                     // R32G32B32_FLOAT      {VK_FORMAT_R8G8B8A8_SRGB, Attachable},                     // A8B8G8R8_SRGB @@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo      // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively      if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { -        tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) -                           ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 -                           : VK_FORMAT_A8B8G8R8_UNORM_PACK32; +        const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format); +        tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;      }      const bool attachable = tuple.usage & Attachable;      const bool storage = tuple.usage & Storage;      VkFormatFeatureFlags usage; -    if (format_type == FormatType::Buffer) { +    switch (format_type) { +    case FormatType::Buffer:          usage =              VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; -    } else { +        break; +    case FormatType::Linear: +    case FormatType::Optimal:          usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |                  VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;          if (attachable) { @@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo          if (storage) {              usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;          } +        break;      }      return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};  } @@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)      return {};  } +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) { +    switch (reduction) { +    case Tegra::Texture::SamplerReduction::WeightedAverage: +        return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; +    case Tegra::Texture::SamplerReduction::Min: +        return VK_SAMPLER_REDUCTION_MODE_MIN_EXT; +    case Tegra::Texture::SamplerReduction::Max: +        return VK_SAMPLER_REDUCTION_MODE_MAX_EXT; +    } +    UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction)); +    return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; +} +  } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e213452f..1a90f192e 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);  VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +  } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ea4b7c1e6..7f521cb9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() {      return library;  } -std::pair<vk::Instance, u32> CreateInstance( -    Common::DynamicLibrary& library, vk::InstanceDispatch& dld, -    WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { +std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library, +                                            vk::InstanceDispatch& dld, WindowSystemType window_type, +                                            bool enable_debug_utils, bool enable_layers) {      if (!library.IsOpen()) {          LOG_ERROR(Render_Vulkan, "Vulkan library not available");          return {}; @@ -133,7 +133,7 @@ std::pair<vk::Instance, u32> CreateInstance(      if (window_type != Core::Frontend::WindowSystemType::Headless) {          extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);      } -    if (enable_layers) { +    if (enable_debug_utils) {          extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);      }      extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); @@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {  bool RendererVulkan::Init() {      library = OpenVulkanLibrary();      std::tie(instance, instance_version) = CreateInstance( -        library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); +        library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug);      if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {          return false;      } @@ -447,7 +447,8 @@ void RendererVulkan::Report() const {  std::vector<std::string> RendererVulkan::EnumerateDevices() {      vk::InstanceDispatch dld;      Common::DynamicLibrary library = OpenVulkanLibrary(); -    vk::Instance instance = CreateInstance(library, dld).first; +    vk::Instance instance = +        CreateInstance(library, dld, WindowSystemType::Headless, false, false).first;      if (!instance) {          return {};      } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 977b86003..74642fba4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -33,10 +33,9 @@ class VKDevice;  class VKMemoryManager;  class VKSwapchain;  class VKScheduler; -class VKImage;  struct VKScreenInfo { -    VKImage* image{}; +    VkImageView image_view{};      u32 width{};      u32 height{};      bool is_srgb{}; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b5b60309e..d3a83f22f 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -16,12 +16,12 @@  #include "core/frontend/emu_window.h"  #include "core/memory.h"  #include "video_core/gpu.h" -#include "video_core/morton.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/renderer_vulkan/renderer_vulkan.h"  #include "video_core/renderer_vulkan/vk_blit_screen.h"  #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h"  #include "video_core/renderer_vulkan/vk_master_semaphore.h"  #include "video_core/renderer_vulkan/vk_memory_manager.h"  #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -29,108 +29,12 @@  #include "video_core/renderer_vulkan/vk_swapchain.h"  #include "video_core/renderer_vulkan/wrapper.h"  #include "video_core/surface.h" +#include "video_core/textures/decoders.h"  namespace Vulkan {  namespace { -// Generated from the "shaders/" directory, read the instructions there. -constexpr u8 blit_vertex_code[] = { -    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, -    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, -    0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, -    0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, -    0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, -    0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, -    0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, -    0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, -    0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, -    0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, -    0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, -    0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, -    0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, -    0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, -    0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, -    0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, -    0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, -    0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, -    0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, -    0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, -    0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, -    0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, -    0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, -    0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, -    0x38, 0x00, 0x01, 0x00}; - -constexpr u8 blit_fragment_code[] = { -    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, -    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, -    0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, -    0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, -    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, -    0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, -    0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00, -    0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, -    0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, -    0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, -    0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, -    0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; -  struct ScreenRectVertex {      ScreenRectVertex() = default;      explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} @@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {      // clang-format on  } -std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { +u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {      using namespace VideoCore::Surface; -    return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); +    return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));  }  std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { @@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool      scheduler.Wait(resource_ticks[image_index]);      resource_ticks[image_index] = scheduler.CurrentTick(); -    VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); - -    UpdateDescriptorSet(image_index, blit_image->GetPresentView()); +    UpdateDescriptorSet(image_index, +                        use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);      BufferData data;      SetUniformData(data, framebuffer);      SetVertexData(data, framebuffer);      auto map = buffer_commit->Map(); -    std::memcpy(map.GetAddress(), &data, sizeof(data)); +    std::memcpy(map.Address(), &data, sizeof(data));      if (!use_accelerated) {          const u64 image_offset = GetRawImageOffset(framebuffer, image_index); -        const auto pixel_format = -            VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);          const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; -        const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); -        rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); +        const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); +        const size_t size_bytes = GetSizeInBytes(framebuffer); +        rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);          // TODO(Rodrigo): Read this from HLE          constexpr u32 block_height_log2 = 4; -        VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, -                                 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, -                                 map.GetAddress() + image_offset, host_ptr); - -        blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, -                               VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); +        const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); +        Tegra::Texture::UnswizzleTexture( +            std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes), +            bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);          const VkBufferImageCopy copy{              .bufferOffset = image_offset, @@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool                  },          };          scheduler.Record( -            [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { -                cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); +            [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { +                const VkImageMemoryBarrier base_barrier{ +                    .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                    .pNext = nullptr, +                    .srcAccessMask = 0, +                    .dstAccessMask = 0, +                    .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                    .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                    .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                    .image = image, +                    .subresourceRange = +                        { +                            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +                            .baseMipLevel = 0, +                            .levelCount = 1, +                            .baseArrayLayer = 0, +                            .layerCount = 1, +                        }, +                }; +                VkImageMemoryBarrier read_barrier = base_barrier; +                read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; +                read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; +                read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + +                VkImageMemoryBarrier write_barrier = base_barrier; +                write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; +                write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + +                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                                       0, read_barrier); +                cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); +                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, +                                       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);              });      }      map.Release(); -    blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, -                           VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); -      scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],                        descriptor_set = descriptor_sets[image_index], buffer = *buffer,                        size = swapchain.GetSize(), pipeline = *pipeline, @@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool          const VkClearValue clear_color{              .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},          }; - -        VkRenderPassBeginInfo renderpass_bi; -        renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; -        renderpass_bi.pNext = nullptr; -        renderpass_bi.renderPass = renderpass; -        renderpass_bi.framebuffer = framebuffer; -        renderpass_bi.renderArea.offset.x = 0; -        renderpass_bi.renderArea.offset.y = 0; -        renderpass_bi.renderArea.extent = size; -        renderpass_bi.clearValueCount = 1; -        renderpass_bi.pClearValues = &clear_color; - -        VkViewport viewport; -        viewport.x = 0.0f; -        viewport.y = 0.0f; -        viewport.width = static_cast<float>(size.width); -        viewport.height = static_cast<float>(size.height); -        viewport.minDepth = 0.0f; -        viewport.maxDepth = 1.0f; - -        VkRect2D scissor; -        scissor.offset.x = 0; -        scissor.offset.y = 0; -        scissor.extent = size; - +        const VkRenderPassBeginInfo renderpass_bi{ +            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, +            .pNext = nullptr, +            .renderPass = renderpass, +            .framebuffer = framebuffer, +            .renderArea = +                { +                    .offset = {0, 0}, +                    .extent = size, +                }, +            .clearValueCount = 1, +            .pClearValues = &clear_color, +        }; +        const VkViewport viewport{ +            .x = 0.0f, +            .y = 0.0f, +            .width = static_cast<float>(size.width), +            .height = static_cast<float>(size.height), +            .minDepth = 0.0f, +            .maxDepth = 1.0f, +        }; +        const VkRect2D scissor{ +            .offset = {0, 0}, +            .extent = size, +        };          cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);          cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);          cmdbuf.SetViewport(0, viewport); @@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)  }  void VKBlitScreen::CreateShaders() { -    vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); -    fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); +    vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); +    fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);  }  void VKBlitScreen::CreateSemaphores() { @@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() {      const VkAttachmentReference color_attachment_ref{          .attachment = 0, -        .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, +        .layout = VK_IMAGE_LAYOUT_GENERAL,      };      const VkSubpassDescription subpass_description{ @@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff  void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {      raw_images.resize(image_count); +    raw_image_views.resize(image_count);      raw_buffer_commits.resize(image_count); -    const VkImageCreateInfo ci{ -        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, -        .pNext = nullptr, -        .flags = 0, -        .imageType = VK_IMAGE_TYPE_2D, -        .format = GetFormat(framebuffer), -        .extent = -            { -                .width = framebuffer.width, -                .height = framebuffer.height, -                .depth = 1, -            }, -        .mipLevels = 1, -        .arrayLayers = 1, -        .samples = VK_SAMPLE_COUNT_1_BIT, -        .tiling = VK_IMAGE_TILING_LINEAR, -        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, -        .sharingMode = VK_SHARING_MODE_EXCLUSIVE, -        .queueFamilyIndexCount = 0, -        .pQueueFamilyIndices = nullptr, -        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, -    }; - -    for (std::size_t i = 0; i < image_count; ++i) { -        raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); -        raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); +    for (size_t i = 0; i < image_count; ++i) { +        raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ +            .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, +            .pNext = nullptr, +            .flags = 0, +            .imageType = VK_IMAGE_TYPE_2D, +            .format = GetFormat(framebuffer), +            .extent = +                { +                    .width = framebuffer.width, +                    .height = framebuffer.height, +                    .depth = 1, +                }, +            .mipLevels = 1, +            .arrayLayers = 1, +            .samples = VK_SAMPLE_COUNT_1_BIT, +            .tiling = VK_IMAGE_TILING_LINEAR, +            .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, +            .sharingMode = VK_SHARING_MODE_EXCLUSIVE, +            .queueFamilyIndexCount = 0, +            .pQueueFamilyIndices = nullptr, +            .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, +        }); +        raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false); +        raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, +            .pNext = nullptr, +            .flags = 0, +            .image = *raw_images[i], +            .viewType = VK_IMAGE_VIEW_TYPE_2D, +            .format = GetFormat(framebuffer), +            .components = +                { +                    .r = VK_COMPONENT_SWIZZLE_IDENTITY, +                    .g = VK_COMPONENT_SWIZZLE_IDENTITY, +                    .b = VK_COMPONENT_SWIZZLE_IDENTITY, +                    .a = VK_COMPONENT_SWIZZLE_IDENTITY, +                }, +            .subresourceRange = +                { +                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +                    .baseMipLevel = 0, +                    .levelCount = 1, +                    .baseArrayLayer = 0, +                    .layerCount = 1, +                }, +        });      }  } @@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag      const VkDescriptorImageInfo image_info{          .sampler = *sampler,          .imageView = image_view, -        .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, +        .imageLayout = VK_IMAGE_LAYOUT_GENERAL,      };      const VkWriteDescriptorSet sampler_write{ diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 8f2839214..2ee374247 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -35,7 +35,6 @@ struct ScreenInfo;  class RasterizerVulkan;  class VKDevice; -class VKImage;  class VKScheduler;  class VKSwapchain; @@ -110,7 +109,8 @@ private:      std::vector<u64> resource_ticks;      std::vector<vk::Semaphore> semaphores; -    std::vector<std::unique_ptr<VKImage>> raw_images; +    std::vector<vk::Image> raw_images; +    std::vector<vk::ImageView> raw_image_views;      std::vector<VKMemoryCommit> raw_buffer_commits;      u32 raw_width = 0;      u32 raw_height = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 444d3fb93..10d296c2f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =      VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |      VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; +constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = +    VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; +  std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { -    return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); +    return std::make_unique<VKStreamBuffer>(device, scheduler);  }  } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, +Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,                 VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) -    : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { +    : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ +                                                                                 staging_pool_} {      const VkBufferCreateInfo ci{          .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,          .pNext = nullptr, @@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {      scheduler.RequestOutsideRenderPassOperationContext();      const VkBuffer handle = Handle(); -    scheduler.Record( -        [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { -            cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); - -            const VkBufferMemoryBarrier barrier{ -                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, -                .pNext = nullptr, -                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, -                .dstAccessMask = UPLOAD_ACCESS_BARRIERS, -                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, -                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, -                .buffer = handle, -                .offset = offset, -                .size = data_size, -            }; -            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, -                                   barrier, {}); -        }); +    scheduler.Record([staging = *staging.handle, handle, offset, data_size, +                      &device = device](vk::CommandBuffer cmdbuf) { +        const VkBufferMemoryBarrier read_barrier{ +            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = +                VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | +                VK_ACCESS_HOST_WRITE_BIT | +                (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), +            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, +            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .buffer = handle, +            .offset = offset, +            .size = data_size, +        }; +        const VkBufferMemoryBarrier write_barrier{ +            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +            .dstAccessMask = UPLOAD_ACCESS_BARRIERS, +            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .buffer = handle, +            .offset = offset, +            .size = data_size, +        }; +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                               0, read_barrier); +        cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, +                               write_barrier); +    });  }  void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { @@ -150,8 +169,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst  VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,                               Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,                               const VKDevice& device_, VKMemoryManager& memory_manager_, -                             VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) -    : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, +                             VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, +                             VKStagingBufferPool& staging_pool_) +    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, +                                                                 cpu_memory_, stream_buffer_},        device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{                                                                                     staging_pool_} {} diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 6008b8373..daf498222 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -41,6 +41,7 @@ public:      }  private: +    const VKDevice& device;      VKScheduler& scheduler;      VKStagingBufferPool& staging_pool; @@ -49,10 +50,11 @@ private:  class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {  public: -    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, -                           Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, -                           const VKDevice& device_, VKMemoryManager& memory_manager_, -                           VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); +    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, +                           Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, +                           const VKDevice& device, VKMemoryManager& memory_manager, +                           VKScheduler& scheduler, VKStreamBuffer& stream_buffer, +                           VKStagingBufferPool& staging_pool);      ~VKBufferCache();      BufferInfo GetEmptyBuffer(std::size_t size) override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 1ac7e2a30..2c030e910 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,6 +10,9 @@  #include "common/alignment.h"  #include "common/assert.h"  #include "common/common_types.h" +#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" +#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" +#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"  #include "video_core/renderer_vulkan/vk_compute_pass.h"  #include "video_core/renderer_vulkan/vk_descriptor_pool.h"  #include "video_core/renderer_vulkan/vk_device.h" @@ -22,99 +25,6 @@ namespace Vulkan {  namespace { -// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there. -constexpr u8 quad_array[] = { -    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, -    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, -    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, -    0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, -    0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, -    0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, -    0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, -    0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, -    0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, -    0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, -    0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, -    0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, -    0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, -    0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, -    0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, -    0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, -    0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00, -    0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, -    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, -    0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, -    0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, -    0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, -    0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, -    0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, -    0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, -    0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, -    0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, -    0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, -    0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, -    0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, -    0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, -    0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, -    0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, -    0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, -    0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00, -    0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, -    0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, -    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; -  VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {      return {          .binding = 0, @@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {      };  } -// Uint8 SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 uint8_pass[] = { -    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, -    0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, -    0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, -    0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f, -    0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, -    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, -    0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, -    0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, -    0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, -    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, -    0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, -    0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, -    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, -    0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00, -    0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, -    0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, -    0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, -    0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, -    0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, -    0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, -    0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, -    0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, -    0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, -    0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, -    0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, -    0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, -    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, -    0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, -    0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, -    0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, -    0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, -    0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, -    0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, -    0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, -    0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, -    0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, -    0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, -    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - -// Quad indexed SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 QUAD_INDEXED_SPV[] = { -    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, -    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, -    0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, -    0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -    0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, -    0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, -    0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, -    0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, -    0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, -    0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, -    0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, -    0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, -    0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, -    0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, -    0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, -    0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, -    0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, -    0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, -    0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, -    0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, -    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, -    0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, -    0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, -    0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, -    0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, -    0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, -    0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, -    0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, -    0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, -    0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, -    0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, -    0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, -    0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, -    0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, -    0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, -    0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, -    0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, -    0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -    0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, -    0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, -    0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, -    0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, -    0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, -    0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, -    0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, -    0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, -    0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, -    0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, -    0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, -    0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, -    0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, -    0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, -    0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, -    0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, -    0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, -    0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, -    0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, -    0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, -    0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, -    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; -  std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {      return {{          { @@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {  VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,                               vk::Span<VkDescriptorSetLayoutBinding> bindings,                               vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, -                             vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, -                             const u8* code) { +                             vk::Span<VkPushConstantRange> push_constants, +                             std::span<const u32> code) {      descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,          .pNext = nullptr, @@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto          .bindingCount = bindings.size(),          .pBindings = bindings.data(),      }); -      layout = device.GetLogical().CreatePipelineLayout({          .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,          .pNext = nullptr, @@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto          .pushConstantRangeCount = push_constants.size(),          .pPushConstantRanges = push_constants.data(),      }); -      if (!templates.empty()) {          descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({              .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, @@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto          descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);      } - -    auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); -    std::memcpy(code_copy.get(), code, code_size); -      module = device.GetLogical().CreateShaderModule({          .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,          .pNext = nullptr,          .flags = 0, -        .codeSize = code_size, -        .pCode = code_copy.get(), +        .codeSize = static_cast<u32>(code.size_bytes()), +        .pCode = code.data(),      }); -      pipeline = device.GetLogical().CreateComputePipeline({          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,          .pNext = nullptr, @@ -467,7 +168,7 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,                               VKUpdateDescriptorQueue& update_descriptor_queue_)      : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),                      BuildQuadArrayPassDescriptorUpdateTemplateEntry(), -                    BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), +                    BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),        scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},        update_descriptor_queue{update_descriptor_queue_} {} @@ -510,12 +211,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32      return {*buffer.handle, 0};  } -Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, -                     VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_, +                     VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,                       VKUpdateDescriptorQueue& update_descriptor_queue_) -    : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), -                    BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), -                    uint8_pass), +    : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), +                    BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),        scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},        update_descriptor_queue{update_descriptor_queue_} {} @@ -561,8 +261,7 @@ QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler                                   VKUpdateDescriptorQueue& update_descriptor_queue_)      : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),                      BuildInputOutputDescriptorUpdateTemplate(), -                    BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), -                    QUAD_INDEXED_SPV), +                    BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),        scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},        update_descriptor_queue{update_descriptor_queue_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 2dc87902c..abdf61e2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -5,6 +5,7 @@  #pragma once  #include <optional> +#include <span>  #include <utility>  #include "common/common_types.h" @@ -24,8 +25,7 @@ public:      explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,                             vk::Span<VkDescriptorSetLayoutBinding> bindings,                             vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, -                           vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, -                           const u8* code); +                           vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);      ~VKComputePass();  protected: diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index ce3846195..370a63f74 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{      VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,      VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,      VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, +    VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,      VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,      VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,      VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -122,6 +123,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(          VK_FORMAT_R16G16_UNORM,          VK_FORMAT_R16G16_SNORM,          VK_FORMAT_R16G16_SFLOAT, +        VK_FORMAT_R16G16_SINT,          VK_FORMAT_R16_UNORM,          VK_FORMAT_R16_UINT,          VK_FORMAT_R8G8B8A8_SRGB, @@ -161,18 +163,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(          VK_FORMAT_BC2_SRGB_BLOCK,          VK_FORMAT_BC3_SRGB_BLOCK,          VK_FORMAT_BC7_SRGB_BLOCK, +        VK_FORMAT_ASTC_4x4_UNORM_BLOCK,          VK_FORMAT_ASTC_4x4_SRGB_BLOCK, -        VK_FORMAT_ASTC_8x8_SRGB_BLOCK, -        VK_FORMAT_ASTC_8x5_SRGB_BLOCK, +        VK_FORMAT_ASTC_5x4_UNORM_BLOCK,          VK_FORMAT_ASTC_5x4_SRGB_BLOCK,          VK_FORMAT_ASTC_5x5_UNORM_BLOCK,          VK_FORMAT_ASTC_5x5_SRGB_BLOCK, -        VK_FORMAT_ASTC_10x8_UNORM_BLOCK, -        VK_FORMAT_ASTC_10x8_SRGB_BLOCK, +        VK_FORMAT_ASTC_6x5_UNORM_BLOCK, +        VK_FORMAT_ASTC_6x5_SRGB_BLOCK,          VK_FORMAT_ASTC_6x6_UNORM_BLOCK,          VK_FORMAT_ASTC_6x6_SRGB_BLOCK, +        VK_FORMAT_ASTC_8x5_UNORM_BLOCK, +        VK_FORMAT_ASTC_8x5_SRGB_BLOCK, +        VK_FORMAT_ASTC_8x6_UNORM_BLOCK, +        VK_FORMAT_ASTC_8x6_SRGB_BLOCK, +        VK_FORMAT_ASTC_8x8_UNORM_BLOCK, +        VK_FORMAT_ASTC_8x8_SRGB_BLOCK, +        VK_FORMAT_ASTC_10x5_UNORM_BLOCK, +        VK_FORMAT_ASTC_10x5_SRGB_BLOCK, +        VK_FORMAT_ASTC_10x6_UNORM_BLOCK, +        VK_FORMAT_ASTC_10x6_SRGB_BLOCK, +        VK_FORMAT_ASTC_10x8_UNORM_BLOCK, +        VK_FORMAT_ASTC_10x8_SRGB_BLOCK,          VK_FORMAT_ASTC_10x10_UNORM_BLOCK,          VK_FORMAT_ASTC_10x10_SRGB_BLOCK, +        VK_FORMAT_ASTC_12x10_UNORM_BLOCK, +        VK_FORMAT_ASTC_12x10_SRGB_BLOCK,          VK_FORMAT_ASTC_12x12_UNORM_BLOCK,          VK_FORMAT_ASTC_12x12_SRGB_BLOCK,          VK_FORMAT_ASTC_8x6_UNORM_BLOCK, @@ -192,7 +208,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(  VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_,                     VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) -    : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, +    : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},        instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} {      SetupFamilies(surface);      SetupFeatures(); @@ -214,7 +230,7 @@ bool VKDevice::Create() {      features2.features = {          .robustBufferAccess = false,          .fullDrawIndexUint32 = false, -        .imageCubeArray = false, +        .imageCubeArray = true,          .independentBlend = true,          .geometryShader = true,          .tessellationShader = true, @@ -242,7 +258,7 @@ bool VKDevice::Create() {          .shaderTessellationAndGeometryPointSize = false,          .shaderImageGatherExtended = true,          .shaderStorageImageExtendedFormats = false, -        .shaderStorageImageMultisample = false, +        .shaderStorageImageMultisample = true,          .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,          .shaderStorageImageWriteWithoutFormat = true,          .shaderUniformBufferArrayDynamicIndexing = false, @@ -268,7 +284,6 @@ bool VKDevice::Create() {          .variableMultisampleRate = false,          .inheritedQueries = false,      }; -      VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,          .pNext = nullptr, @@ -380,6 +395,20 @@ bool VKDevice::Create() {          LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");      } +    VkPhysicalDeviceRobustness2FeaturesEXT robustness2; +    if (ext_robustness2) { +        robustness2 = { +            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, +            .pNext = nullptr, +            .robustBufferAccess2 = false, +            .robustImageAccess2 = true, +            .nullDescriptor = true, +        }; +        SetNext(next, robustness2); +    } else { +        LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); +    } +      if (!ext_depth_range_unrestricted) {          LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");      } @@ -405,7 +434,14 @@ bool VKDevice::Create() {      }      CollectTelemetryParameters(); +    CollectToolingInfo(); +    if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { +        LOG_WARNING( +            Render_Vulkan, +            "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); +        ext_extended_dynamic_state = false; +    }      if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {          // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it          // seems to cause stability issues @@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const {      LOG_CRITICAL(Render_Vulkan, "Device loss occured!");      // Wait for the log to flush and for Nsight Aftermath to dump the results -    std::this_thread::sleep_for(std::chrono::seconds{3}); +    std::this_thread::sleep_for(std::chrono::seconds{15});  }  void VKDevice::SaveShader(const std::vector<u32>& spirv) const { @@ -499,6 +535,16 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)      return true;  } +bool VKDevice::TestDepthStencilBlits() const { +    static constexpr VkFormatFeatureFlags required_features = +        VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; +    const auto test_features = [](VkFormatProperties props) { +        return (props.optimalTilingFeatures & required_features) == required_features; +    }; +    return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && +           test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); +} +  bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,                                   FormatType format_type) const {      const auto it = format_properties.find(wanted_format); @@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {      const auto features{physical.GetFeatures()};      const std::array feature_report = {          std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), +        std::make_pair(features.imageCubeArray, "imageCubeArray"),          std::make_pair(features.independentBlend, "independentBlend"),          std::make_pair(features.depthClamp, "depthClamp"),          std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), @@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {          std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),          std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),          std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), +        std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),          std::make_pair(features.shaderStorageImageWriteWithoutFormat,                         "shaderStorageImageWriteWithoutFormat"),      }; @@ -608,6 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {      bool has_ext_transform_feedback{};      bool has_ext_custom_border_color{};      bool has_ext_extended_dynamic_state{}; +    bool has_ext_robustness2{};      for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {          const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,                                bool push) { @@ -627,11 +676,15 @@ std::vector<const char*> VKDevice::LoadExtensions() {          test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);          test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);          test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); +        test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);          test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,               true); +        test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); +        test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);          test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);          test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);          test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); +        test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);          if (instance_version >= VK_API_VERSION_1_1) {              test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);          } @@ -733,6 +786,18 @@ std::vector<const char*> VKDevice::LoadExtensions() {          }      } +    if (has_ext_robustness2) { +        VkPhysicalDeviceRobustness2FeaturesEXT robustness2; +        robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; +        robustness2.pNext = nullptr; +        features.pNext = &robustness2; +        physical.GetFeatures2KHR(features); +        if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { +            extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); +            ext_robustness2 = true; +        } +    } +      return extensions;  } @@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) {  void VKDevice::SetupFeatures() {      const auto supported_features{physical.GetFeatures()};      is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; +    is_blit_depth_stencil_supported = TestDepthStencilBlits();      is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);  } @@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() {      }  } +void VKDevice::CollectToolingInfo() { +    if (!ext_tooling_info) { +        return; +    } +    const auto vkGetPhysicalDeviceToolPropertiesEXT = +        reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>( +            dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); +    if (!vkGetPhysicalDeviceToolPropertiesEXT) { +        return; +    } +    u32 tool_count = 0; +    if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { +        return; +    } +    std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count); +    if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { +        return; +    } +    for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { +        const std::string_view name = tool.name; +        LOG_INFO(Render_Vulkan, "{}", name); +        has_renderdoc = has_renderdoc || name == "RenderDoc"; +        has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; +    } +} +  std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {      static constexpr float QUEUE_PRIORITY = 1.0f; diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 4286673d9..995dcfc0f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -157,6 +157,11 @@ public:          return is_formatless_image_load_supported;      } +    /// Returns true when blitting from and to depth stencil images is supported. +    bool IsBlitDepthStencilSupported() const { +        return is_blit_depth_stencil_supported; +    } +      /// Returns true if the device supports VK_NV_viewport_swizzle.      bool IsNvViewportSwizzleSupported() const {          return nv_viewport_swizzle; @@ -172,6 +177,11 @@ public:          return ext_index_type_uint8;      } +    /// Returns true if the device supports VK_EXT_sampler_filter_minmax. +    bool IsExtSamplerFilterMinmaxSupported() const { +        return ext_sampler_filter_minmax; +    } +      /// Returns true if the device supports VK_EXT_depth_range_unrestricted.      bool IsExtDepthRangeUnrestrictedSupported() const {          return ext_depth_range_unrestricted; @@ -197,6 +207,16 @@ public:          return ext_extended_dynamic_state;      } +    /// Returns true if the device supports VK_EXT_shader_stencil_export. +    bool IsExtShaderStencilExportSupported() const { +        return ext_shader_stencil_export; +    } + +    /// Returns true when a known debugging tool is attached. +    bool HasDebuggingToolAttached() const { +        return has_renderdoc || has_nsight_graphics; +    } +      /// Returns the vendor name reported from Vulkan.      std::string_view GetVendorName() const {          return vendor_name; @@ -228,16 +248,23 @@ private:      /// Collects telemetry information from the device.      void CollectTelemetryParameters(); +    /// Collects information about attached tools. +    void CollectToolingInfo(); +      /// Returns a list of queue initialization descriptors.      std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;      /// Returns true if ASTC textures are natively supported.      bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; +    /// Returns true if the device natively supports blitting depth stencil images. +    bool TestDepthStencilBlits() const; +      /// Returns true if a format is supported.      bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,                             FormatType format_type) const; +    VkInstance instance;                    ///< Vulkan instance.      vk::DeviceDispatch dld;                 ///< Device function pointers.      vk::PhysicalDevice physical;            ///< Physical device.      VkPhysicalDeviceProperties properties;  ///< Device properties. @@ -253,15 +280,22 @@ private:      bool is_float16_supported{};            ///< Support for float16 arithmetics.      bool is_warp_potentially_bigger{};      ///< Host warp size can be bigger than guest.      bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. +    bool is_blit_depth_stencil_supported{};    ///< Support for blitting from and to depth stencil.      bool nv_viewport_swizzle{};                ///< Support for VK_NV_viewport_swizzle.      bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.      bool ext_index_type_uint8{};               ///< Support for VK_EXT_index_type_uint8. +    bool ext_sampler_filter_minmax{};          ///< Support for VK_EXT_sampler_filter_minmax.      bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.      bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer. +    bool ext_tooling_info{};                   ///< Support for VK_EXT_tooling_info.      bool ext_transform_feedback{};             ///< Support for VK_EXT_transform_feedback.      bool ext_custom_border_color{};            ///< Support for VK_EXT_custom_border_color.      bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state. +    bool ext_robustness2{};                    ///< Support for VK_EXT_robustness2. +    bool ext_shader_stencil_export{};          ///< Support for VK_EXT_shader_stencil_export.      bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config. +    bool has_renderdoc{};                      ///< Has RenderDoc attached +    bool has_nsight_graphics{};                ///< Has Nsight Graphics attached      // Asynchronous Graphics Pipeline setting      bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0bcaee714..774a12a53 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const {  }  VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, -                               Tegra::MemoryManager& memory_manager_, -                               VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, -                               VKQueryCache& query_cache_, const VKDevice& device_, -                               VKScheduler& scheduler_) +                               Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, +                               VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, +                               const VKDevice& device_, VKScheduler& scheduler_)      : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},        device{device_}, scheduler{scheduler_} {} diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index c8547cc24..c2869e8e3 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -8,6 +8,7 @@  #include "video_core/fence_manager.h"  #include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h"  #include "video_core/renderer_vulkan/wrapper.h"  namespace Core { @@ -24,7 +25,6 @@ class VKBufferCache;  class VKDevice;  class VKQueryCache;  class VKScheduler; -class VKTextureCache;  class InnerFence : public VideoCommon::FenceBase {  public: @@ -51,12 +51,12 @@ private:  using Fence = std::shared_ptr<InnerFence>;  using GenericFenceManager = -    VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; +    VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;  class VKFenceManager final : public GenericFenceManager {  public:      explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, -                            Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, +                            Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,                              VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,                              const VKDevice& device_, VKScheduler& scheduler_); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 970979fa1..7979df3a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -15,7 +15,6 @@  #include "video_core/renderer_vulkan/vk_device.h"  #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"  #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h"  #include "video_core/renderer_vulkan/vk_scheduler.h"  #include "video_core/renderer_vulkan/vk_update_descriptor.h"  #include "video_core/renderer_vulkan/wrapper.h" @@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {      };  } +VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { +    switch (msaa_mode) { +    case Tegra::Texture::MsaaMode::Msaa1x1: +        return VK_SAMPLE_COUNT_1_BIT; +    case Tegra::Texture::MsaaMode::Msaa2x1: +    case Tegra::Texture::MsaaMode::Msaa2x1_D3D: +        return VK_SAMPLE_COUNT_2_BIT; +    case Tegra::Texture::MsaaMode::Msaa2x2: +    case Tegra::Texture::MsaaMode::Msaa2x2_VC4: +    case Tegra::Texture::MsaaMode::Msaa2x2_VC12: +        return VK_SAMPLE_COUNT_4_BIT; +    case Tegra::Texture::MsaaMode::Msaa4x2: +    case Tegra::Texture::MsaaMode::Msaa4x2_D3D: +    case Tegra::Texture::MsaaMode::Msaa4x2_VC8: +    case Tegra::Texture::MsaaMode::Msaa4x2_VC24: +        return VK_SAMPLE_COUNT_8_BIT; +    case Tegra::Texture::MsaaMode::Msaa4x4: +        return VK_SAMPLE_COUNT_16_BIT; +    default: +        UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); +        return VK_SAMPLE_COUNT_1_BIT; +    } +} +  } // Anonymous namespace  VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,                                         VKDescriptorPool& descriptor_pool_,                                         VKUpdateDescriptorQueue& update_descriptor_queue_, -                                       VKRenderPassCache& renderpass_cache_, -                                       const GraphicsPipelineCacheKey& key_, -                                       vk::Span<VkDescriptorSetLayoutBinding> bindings_, -                                       const SPIRVProgram& program_) -    : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, -      descriptor_set_layout{CreateDescriptorSetLayout(bindings_)}, +                                       const GraphicsPipelineCacheKey& key, +                                       vk::Span<VkDescriptorSetLayoutBinding> bindings, +                                       const SPIRVProgram& program, u32 num_color_buffers) +    : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, +      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},        descriptor_allocator{descriptor_pool_, *descriptor_set_layout},        update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, -      descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( -                                                                         program_)}, -      renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, -      pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {} +      descriptor_template{CreateDescriptorUpdateTemplate(program)}, +      modules(CreateShaderModules(program)), +      pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}  VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -179,8 +200,9 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(      return shader_modules;  } -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, -                                                const SPIRVProgram& program) const { +vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, +                                                VkRenderPass renderpass, +                                                u32 num_color_buffers) const {      const auto& state = cache_key.fixed_state;      const auto& viewport_swizzles = state.viewport_swizzles; @@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa      };      std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; -    std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), -                   UnpackViewportSwizzle); +    std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);      VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,          .pNext = nullptr, @@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,          .pNext = nullptr,          .flags = 0, -        .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, +        .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),          .sampleShadingEnable = VK_FALSE,          .minSampleShading = 0.0f,          .pSampleMask = nullptr, @@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa      };      std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; -    const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); -    for (std::size_t index = 0; index < num_attachments; ++index) { +    for (std::size_t index = 0; index < num_color_buffers; ++index) {          static constexpr std::array COMPONENT_TABLE{              VK_COLOR_COMPONENT_R_BIT,              VK_COLOR_COMPONENT_G_BIT, @@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa          .flags = 0,          .logicOpEnable = VK_FALSE,          .logicOp = VK_LOGIC_OP_COPY, -        .attachmentCount = static_cast<u32>(num_attachments), +        .attachmentCount = num_color_buffers,          .pAttachments = cb_attachments.data(),          .blendConstants = {},      }; @@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa              stage_ci.pNext = &subgroup_size_ci;          }      } - -    const VkGraphicsPipelineCreateInfo ci{ +    return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,          .pNext = nullptr,          .flags = 0, @@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa          .subpass = 0,          .basePipelineHandle = nullptr,          .basePipelineIndex = 0, -    }; -    return device.GetLogical().CreateGraphicsPipeline(ci); +    });  }  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3fb31d55a..214d06b4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -8,10 +8,10 @@  #include <optional>  #include <vector> +#include "common/common_types.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_vulkan/fixed_pipeline_state.h"  #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h"  #include "video_core/renderer_vulkan/vk_shader_decompiler.h"  #include "video_core/renderer_vulkan/wrapper.h" @@ -20,8 +20,7 @@ namespace Vulkan {  using Maxwell = Tegra::Engines::Maxwell3D::Regs;  struct GraphicsPipelineCacheKey { -    RenderPassParams renderpass_params; -    u32 padding; +    VkRenderPass renderpass;      std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;      FixedPipelineState fixed_state; @@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey {      }      std::size_t Size() const noexcept { -        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); +        return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();      }  };  static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); @@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);  class VKDescriptorPool;  class VKDevice; -class VKRenderPassCache;  class VKScheduler;  class VKUpdateDescriptorQueue; @@ -52,12 +50,11 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt  class VKGraphicsPipeline final {  public:      explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, -                                VKDescriptorPool& descriptor_pool_, +                                VKDescriptorPool& descriptor_pool,                                  VKUpdateDescriptorQueue& update_descriptor_queue_, -                                VKRenderPassCache& renderpass_cache_, -                                const GraphicsPipelineCacheKey& key_, -                                vk::Span<VkDescriptorSetLayoutBinding> bindings_, -                                const SPIRVProgram& program_); +                                const GraphicsPipelineCacheKey& key, +                                vk::Span<VkDescriptorSetLayoutBinding> bindings, +                                const SPIRVProgram& program, u32 num_color_buffers);      ~VKGraphicsPipeline();      VkDescriptorSet CommitDescriptorSet(); @@ -70,10 +67,6 @@ public:          return *layout;      } -    VkRenderPass GetRenderPass() const { -        return renderpass; -    } -      GraphicsPipelineCacheKey GetCacheKey() const {          return cache_key;      } @@ -89,8 +82,8 @@ private:      std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; -    vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, -                                const SPIRVProgram& program) const; +    vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, +                                u32 num_color_buffers) const;      const VKDevice& device;      VKScheduler& scheduler; @@ -104,7 +97,6 @@ private:      vk::DescriptorUpdateTemplateKHR descriptor_template;      std::vector<vk::ShaderModule> modules; -    VkRenderPass renderpass;      vk::Pipeline pipeline;  }; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp deleted file mode 100644 index 072d14e3b..000000000 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <memory> -#include <vector> - -#include "common/assert.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_, -                 const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_) -    : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_}, -      image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} { -    UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0, -                         "Queue family tracking is not implemented"); - -    image = device_.GetLogical().CreateImage(image_ci_); - -    const u32 num_ranges = image_num_layers * image_num_levels; -    barriers.resize(num_ranges); -    subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout}); -} - -VKImage::~VKImage() = default; - -void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, -                         VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, -                         VkImageLayout new_layout) { -    if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { -        return; -    } - -    std::size_t cursor = 0; -    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { -        for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) { -            const u32 layer = base_layer + layer_it; -            const u32 level = base_level + level_it; -            auto& state = GetSubrangeState(layer, level); -            auto& barrier = barriers[cursor]; -            barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; -            barrier.pNext = nullptr; -            barrier.srcAccessMask = state.access; -            barrier.dstAccessMask = new_access; -            barrier.oldLayout = state.layout; -            barrier.newLayout = new_layout; -            barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; -            barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; -            barrier.image = *image; -            barrier.subresourceRange.aspectMask = aspect_mask; -            barrier.subresourceRange.baseMipLevel = level; -            barrier.subresourceRange.levelCount = 1; -            barrier.subresourceRange.baseArrayLayer = layer; -            barrier.subresourceRange.layerCount = 1; -            state.access = new_access; -            state.layout = new_layout; -        } -    } - -    scheduler.RequestOutsideRenderPassOperationContext(); - -    scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { -        // TODO(Rodrigo): Implement a way to use the latest stage across subresources. -        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, -                               VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, -                               vk::Span(barriers.data(), cursor)); -    }); -} - -bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, -                         VkAccessFlags new_access, VkImageLayout new_layout) noexcept { -    const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && -                               base_level == 0 && num_levels == image_num_levels; -    if (!is_full_range) { -        state_diverged = true; -    } - -    if (!state_diverged) { -        auto& state = GetSubrangeState(0, 0); -        if (state.access != new_access || state.layout != new_layout) { -            return true; -        } -    } - -    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { -        for (u32 level_it = 0; level_it < num_levels; ++level_it) { -            const u32 layer = base_layer + layer_it; -            const u32 level = base_level + level_it; -            auto& state = GetSubrangeState(layer, level); -            if (state.access != new_access || state.layout != new_layout) { -                return true; -            } -        } -    } -    return false; -} - -void VKImage::CreatePresentView() { -    // Image type has to be 2D to be presented. -    present_view = device.GetLogical().CreateImageView({ -        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, -        .pNext = nullptr, -        .flags = 0, -        .image = *image, -        .viewType = VK_IMAGE_VIEW_TYPE_2D, -        .format = format, -        .components = -            { -                .r = VK_COMPONENT_SWIZZLE_IDENTITY, -                .g = VK_COMPONENT_SWIZZLE_IDENTITY, -                .b = VK_COMPONENT_SWIZZLE_IDENTITY, -                .a = VK_COMPONENT_SWIZZLE_IDENTITY, -            }, -        .subresourceRange = -            { -                .aspectMask = aspect_mask, -                .baseMipLevel = 0, -                .levelCount = 1, -                .baseArrayLayer = 0, -                .layerCount = 1, -            }, -    }); -} - -VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { -    return subrange_states[static_cast<std::size_t>(layer * image_num_levels) + -                           static_cast<std::size_t>(level)]; -} - -} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h deleted file mode 100644 index 287ab90ca..000000000 --- a/src/video_core/renderer_vulkan/vk_image.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <vector> - -#include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -class VKDevice; -class VKScheduler; - -class VKImage { -public: -    explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_, -                     const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_); -    ~VKImage(); - -    /// Records in the passed command buffer an image transition and updates the state of the image. -    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, -                    VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, -                    VkImageLayout new_layout); - -    /// Returns a view compatible with presentation, the image has to be 2D. -    VkImageView GetPresentView() { -        if (!present_view) { -            CreatePresentView(); -        } -        return *present_view; -    } - -    /// Returns the Vulkan image handler. -    const vk::Image& GetHandle() const { -        return image; -    } - -    /// Returns the Vulkan format for this image. -    VkFormat GetFormat() const { -        return format; -    } - -    /// Returns the Vulkan aspect mask. -    VkImageAspectFlags GetAspectMask() const { -        return aspect_mask; -    } - -private: -    struct SubrangeState final { -        VkAccessFlags access = 0;                         ///< Current access bits. -        VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. -    }; - -    bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, -                    VkAccessFlags new_access, VkImageLayout new_layout) noexcept; - -    /// Creates a presentation view. -    void CreatePresentView(); - -    /// Returns the subrange state for a layer and layer. -    SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept; - -    const VKDevice& device; ///< Device handler. -    VKScheduler& scheduler; ///< Device scheduler. - -    const VkFormat format;                ///< Vulkan format. -    const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. -    const u32 image_num_layers;           ///< Number of layers. -    const u32 image_num_levels;           ///< Number of mipmap levels. - -    vk::Image image;            ///< Image handle. -    vk::ImageView present_view; ///< Image view compatible with presentation. - -    std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers. -    std::vector<SubrangeState> subrange_states; ///< Current subrange state. - -    bool state_diverged = false; ///< True when subresources mismatch in layout. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index be53d450f..56b24b70f 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() {  }  MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { -    return MemoryMap{this, memory.Map(interval.first + offset_, size)}; +    return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));  }  void VKMemoryCommitImpl::Unmap() const { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 39f903ec8..318f8b43e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -5,6 +5,7 @@  #pragma once  #include <memory> +#include <span>  #include <utility>  #include <vector>  #include "common/common_types.h" @@ -93,8 +94,8 @@ private:  /// Holds ownership of a memory map.  class MemoryMap final {  public: -    explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) -        : commit{commit_}, address{address_} {} +    explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_) +        : commit{commit_}, span{span_} {}      ~MemoryMap() {          if (commit) { @@ -108,19 +109,24 @@ public:          commit = nullptr;      } +    /// Returns a span to the memory map. +    [[nodiscard]] std::span<u8> Span() const noexcept { +        return span; +    } +      /// Returns the address of the memory map. -    u8* GetAddress() const { -        return address; +    [[nodiscard]] u8* Address() const noexcept { +        return span.data();      }      /// Returns the address of the memory map; -    operator u8*() const { -        return address; +    [[nodiscard]] operator u8*() const noexcept { +        return span.data();      }  private:      const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. -    u8* address{};                      ///< Address to the mapped memory. +    std::span<u8> span;                 ///< Address to the mapped memory.  };  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3fb264d03..083796d05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -8,6 +8,7 @@  #include <vector>  #include "common/bit_cast.h" +#include "common/cityhash.h"  #include "common/microprofile.h"  #include "core/core.h"  #include "core/memory.h" @@ -22,7 +23,6 @@  #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"  #include "video_core/renderer_vulkan/vk_pipeline_cache.h"  #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h"  #include "video_core/renderer_vulkan/vk_scheduler.h"  #include "video_core/renderer_vulkan/vk_update_descriptor.h"  #include "video_core/renderer_vulkan/wrapper.h" @@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX  constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;  constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ -    VideoCommon::Shader::CompileDepth::FullDecompile}; +    .depth = VideoCommon::Shader::CompileDepth::FullDecompile, +    .disable_else_derivation = true, +};  constexpr std::size_t GetStageFromProgram(std::size_t program) {      return program == 0 ? 0 : program - 1; @@ -149,12 +151,11 @@ VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_                                   Tegra::Engines::KeplerCompute& kepler_compute_,                                   Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,                                   VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, -                                 VKUpdateDescriptorQueue& update_descriptor_queue_, -                                 VKRenderPassCache& renderpass_cache_) -    : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, -      gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, -      descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, -      renderpass_cache{renderpass_cache_} {} +                                 VKUpdateDescriptorQueue& update_descriptor_queue_) +    : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, +      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, +      scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ +                                                                    update_descriptor_queue_} {}  VKPipelineCache::~VKPipelineCache() = default; @@ -199,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {  }  VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( -    const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { +    const GraphicsPipelineCacheKey& key, u32 num_color_buffers, +    VideoCommon::Shader::AsyncShaders& async_shaders) {      MICROPROFILE_SCOPE(Vulkan_PipelineCache);      if (last_graphics_pipeline && last_graphics_key == key) { @@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(              LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());              const auto [program, bindings] = DecompileShaders(key.fixed_state);              async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, -                                            update_descriptor_queue, renderpass_cache, bindings, -                                            program, key); +                                            update_descriptor_queue, bindings, program, key, +                                            num_color_buffers);          }          last_graphics_pipeline = pair->second.get();          return last_graphics_pipeline; @@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(          LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());          const auto [program, bindings] = DecompileShaders(key.fixed_state);          entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, -                                                     update_descriptor_queue, renderpass_cache, key, -                                                     bindings, program); +                                                     update_descriptor_queue, key, bindings, +                                                     program, num_color_buffers);          gpu.ShaderNotify().MarkShaderComplete();      }      last_graphics_pipeline = entry.get(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 9e1f8fcbb..fbaa8257c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,7 +19,6 @@  #include "video_core/engines/maxwell_3d.h"  #include "video_core/renderer_vulkan/fixed_pipeline_state.h"  #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h"  #include "video_core/renderer_vulkan/vk_shader_decompiler.h"  #include "video_core/renderer_vulkan/wrapper.h"  #include "video_core/shader/async_shaders.h" @@ -119,18 +118,18 @@ private:  class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {  public: -    explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, -                             Tegra::Engines::Maxwell3D& maxwell3d_, -                             Tegra::Engines::KeplerCompute& kepler_compute_, -                             Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, -                             VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, -                             VKUpdateDescriptorQueue& update_descriptor_queue_, -                             VKRenderPassCache& renderpass_cache_); +    explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, +                             Tegra::Engines::Maxwell3D& maxwell3d, +                             Tegra::Engines::KeplerCompute& kepler_compute, +                             Tegra::MemoryManager& gpu_memory, const VKDevice& device, +                             VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, +                             VKUpdateDescriptorQueue& update_descriptor_queue);      ~VKPipelineCache() override;      std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();      VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, +                                            u32 num_color_buffers,                                              VideoCommon::Shader::AsyncShaders& async_shaders);      VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); @@ -153,7 +152,6 @@ private:      VKScheduler& scheduler;      VKDescriptorPool& descriptor_pool;      VKUpdateDescriptorQueue& update_descriptor_queue; -    VKRenderPassCache& renderpass_cache;      std::unique_ptr<Shader> null_shader;      std::unique_ptr<Shader> null_kernel; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f93986aab..04c5c859c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -19,6 +19,7 @@  #include "core/settings.h"  #include "video_core/engines/kepler_compute.h"  #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/blit_image.h"  #include "video_core/renderer_vulkan/fixed_pipeline_state.h"  #include "video_core/renderer_vulkan/maxwell_to_vk.h"  #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -30,8 +31,6 @@  #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"  #include "video_core/renderer_vulkan/vk_pipeline_cache.h"  #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h"  #include "video_core/renderer_vulkan/vk_scheduler.h"  #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"  #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -39,10 +38,13 @@  #include "video_core/renderer_vulkan/vk_update_descriptor.h"  #include "video_core/renderer_vulkan/wrapper.h"  #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h"  namespace Vulkan {  using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType;  MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));  MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); @@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192  namespace { -constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); +constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); -VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { +VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) {      const auto& src = regs.viewport_transform[index];      const float width = src.scale_x * 2.0f;      const float height = src.scale_y * 2.0f; @@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si      return viewport;  } -VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { +VkRect2D GetScissorState(const Maxwell& regs, size_t index) {      const auto& src = regs.scissor_test[index];      VkRect2D scissor;      if (src.enable) { @@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {  std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(      const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {      std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; -    for (std::size_t i = 0; i < std::size(addresses); ++i) { +    for (size_t i = 0; i < std::size(addresses); ++i) {          addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;      }      return addresses;  } -void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, -                      VkAccessFlags access) { -    for (auto& [view, layout] : views) { -        view->Transition(*layout, pipeline_stage, access); +struct TextureHandle { +    constexpr TextureHandle(u32 data, bool via_header_index) { +        const Tegra::Texture::TextureHandle handle{data}; +        image = handle.tic_id; +        sampler = via_header_index ? image : handle.tsc_id.Value();      } -} + +    u32 image; +    u32 sampler; +};  template <typename Engine, typename Entry> -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, -                                               std::size_t stage, std::size_t index = 0) { -    const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, +                             size_t stage, size_t index = 0) { +    const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);      if constexpr (std::is_same_v<Entry, SamplerEntry>) {          if (entry.is_separated) {              const u32 buffer_1 = entry.buffer;              const u32 buffer_2 = entry.secondary_buffer;              const u32 offset_1 = entry.offset;              const u32 offset_2 = entry.secondary_offset; -            const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); -            const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); -            return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); +            const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); +            const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); +            return TextureHandle(handle_1 | handle_2, via_header_index);          }      }      if (entry.is_bindless) { -        const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); -        return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); -    } -    const auto& gpu_profile = engine.AccessGuestDriverProfile(); -    const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); -    const u32 offset = entry.offset + entry_offset; -    if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { -        return engine.GetStageTexture(stage_type, offset); -    } else { -        return engine.GetTexture(offset); -    } -} - -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { -    if (!is_clear) { -        return true; -    } -    // First we have to make sure all clear masks are enabled. -    if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || -        !regs.clear_buffers.A) { -        return true; -    } -    // If scissors are disabled, the whole screen is cleared -    if (!regs.clear_flags.scissor) { -        return false; +        const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); +        return TextureHandle(raw, via_header_index);      } -    // Then we have to confirm scissor testing clears the whole image -    const std::size_t index = regs.clear_buffers.RT; -    const auto& scissor = regs.scissor_test[0]; -    return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || -           scissor.max_y < regs.rt[index].height; +    const u32 buffer = engine.GetBoundBuffer(); +    const u64 offset = (entry.offset + index) * sizeof(u32); +    return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);  } -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { -    // If we are not clearing, the contents have to be preserved -    if (!is_clear) { -        return true; -    } -    // For depth stencil clears we only have to confirm scissor test covers the whole image -    if (!regs.clear_flags.scissor) { -        return false; -    } -    // Make sure the clear cover the whole image -    const auto& scissor = regs.scissor_test[0]; -    return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || -           scissor.max_y < regs.zeta_height; -} - -template <std::size_t N> +template <size_t N>  std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {      std::array<VkDeviceSize, N> expanded;      std::copy(strides.begin(), strides.end(), expanded.begin());      return expanded;  } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { +    if (entry.is_buffer) { +        return ImageViewType::e2D; +    } +    switch (entry.type) { +    case Tegra::Shader::TextureType::Texture1D: +        return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; +    case Tegra::Shader::TextureType::Texture2D: +        return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; +    case Tegra::Shader::TextureType::Texture3D: +        return ImageViewType::e3D; +    case Tegra::Shader::TextureType::TextureCube: +        return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; +    } +    UNREACHABLE(); +    return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { +    switch (entry.type) { +    case Tegra::Shader::ImageType::Texture1D: +        return ImageViewType::e1D; +    case Tegra::Shader::ImageType::Texture1DArray: +        return ImageViewType::e1DArray; +    case Tegra::Shader::ImageType::Texture2D: +        return ImageViewType::e2D; +    case Tegra::Shader::ImageType::Texture2DArray: +        return ImageViewType::e2DArray; +    case Tegra::Shader::ImageType::Texture3D: +        return ImageViewType::e3D; +    case Tegra::Shader::ImageType::TextureBuffer: +        return ImageViewType::Buffer; +    } +    UNREACHABLE(); +    return ImageViewType::e2D; +} + +void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, +                          VKUpdateDescriptorQueue& update_descriptor_queue, +                          ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { +    for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { +        const ImageViewId image_view_id = *image_view_id_ptr++; +        const ImageView& image_view = texture_cache.GetImageView(image_view_id); +        update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); +    } +    for (const auto& entry : entries.samplers) { +        for (size_t i = 0; i < entry.size; ++i) { +            const VkSampler sampler = *sampler_ptr++; +            const ImageViewId image_view_id = *image_view_id_ptr++; +            const ImageView& image_view = texture_cache.GetImageView(image_view_id); +            const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); +            update_descriptor_queue.AddSampledImage(handle, sampler); +        } +    } +    for ([[maybe_unused]] const auto& entry : entries.storage_texels) { +        const ImageViewId image_view_id = *image_view_id_ptr++; +        const ImageView& image_view = texture_cache.GetImageView(image_view_id); +        update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); +    } +    for (const auto& entry : entries.images) { +        // TODO: Mark as modified +        const ImageViewId image_view_id = *image_view_id_ptr++; +        const ImageView& image_view = texture_cache.GetImageView(image_view_id); +        const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); +        update_descriptor_queue.AddImage(handle); +    } +} +  } // Anonymous namespace  class BufferBindings final { @@ -290,7 +316,7 @@ public:  private:      // Some of these fields are intentionally left uninitialized to avoid initializing them twice.      struct { -        std::size_t num_buffers = 0; +        size_t num_buffers = 0;          std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;          std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;          std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; @@ -303,7 +329,7 @@ private:          VkIndexType type;      } index; -    template <std::size_t N> +    template <size_t N>      void BindStatic(const VKDevice& device, VKScheduler& scheduler) const {          if (device.IsExtExtendedDynamicStateSupported()) {              if (index.buffer) { @@ -320,7 +346,7 @@ private:          }      } -    template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> +    template <size_t N, bool is_indexed, bool has_extended_dynamic_state>      void BindStatic(VKScheduler& scheduler) const {          static_assert(N <= Maxwell::NumVertexArrays);          if constexpr (N == 0) { @@ -385,20 +411,23 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra                                     Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,                                     const VKDevice& device_, VKMemoryManager& memory_manager_,                                     StateTracker& state_tracker_, VKScheduler& scheduler_) -    : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), -      maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), -      device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), -      scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), -      descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), -      renderpass_cache(device), +    : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, +      gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, +      screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_}, +      state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), +      staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler), +      update_descriptor_queue(device, scheduler), +      blit_image(device, scheduler, state_tracker, descriptor_pool),        quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),        quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),        uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), -      texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), +      texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image}, +      texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),        pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, -                     descriptor_pool, update_descriptor_queue, renderpass_cache), -      buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), -      sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), +                     descriptor_pool, update_descriptor_queue), +      buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, +                   staging_pool), +      query_cache{*this, maxwell3d, gpu_memory, device, scheduler},        fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,                      scheduler),        wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { @@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {      const DrawParameters draw_params =          SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); -    update_descriptor_queue.Acquire(); -    sampled_views.clear(); -    image_views.clear(); +    auto lock = texture_cache.AcquireLock(); +    texture_cache.SynchronizeGraphicsDescriptors(); + +    texture_cache.UpdateRenderTargets(false);      const auto shaders = pipeline_cache.GetShaders();      key.shaders = GetShaderAddresses(shaders); @@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {      buffer_cache.Unmap(); -    const Texceptions texceptions = UpdateAttachments(false); -    SetupImageTransitions(texceptions, color_attachments, zeta_attachment); - -    key.renderpass_params = GetRenderPassParams(texceptions); -    key.padding = 0; +    const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); +    key.renderpass = framebuffer->RenderPass(); -    auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); +    auto* const pipeline = +        pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders);      if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {          // Async graphics pipeline was not ready.          return;      } -    scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - -    const auto renderpass = pipeline->GetRenderPass(); -    const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); -    scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - -    UpdateDynamicStates(); -      buffer_bindings.Bind(device, scheduler);      BeginTransformFeedback(); +    scheduler.RequestRenderpass(framebuffer); +    scheduler.BindGraphicsPipeline(pipeline->GetHandle()); +    UpdateDynamicStates(); +      const auto pipeline_layout = pipeline->GetLayout();      const auto descriptor_set = pipeline->CommitDescriptorSet();      scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { @@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() {          return;      } -    sampled_views.clear(); -    image_views.clear(); -      query_cache.UpdateCounters();      const auto& regs = maxwell3d.regs; @@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() {          return;      } -    [[maybe_unused]] const auto texceptions = UpdateAttachments(true); -    DEBUG_ASSERT(texceptions.none()); -    SetupImageTransitions(0, color_attachments, zeta_attachment); +    auto lock = texture_cache.AcquireLock(); +    texture_cache.UpdateRenderTargets(true); +    const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); +    const VkExtent2D render_area = framebuffer->RenderArea(); +    scheduler.RequestRenderpass(framebuffer); -    const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); -    const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); -    scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - -    VkClearRect clear_rect; -    clear_rect.baseArrayLayer = regs.clear_buffers.layer; -    clear_rect.layerCount = 1; -    clear_rect.rect = GetScissorState(regs, 0); -    clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); -    clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); +    VkClearRect clear_rect{ +        .rect = GetScissorState(regs, 0), +        .baseArrayLayer = regs.clear_buffers.layer, +        .layerCount = 1, +    }; +    if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) { +        return; +    } +    clear_rect.rect.extent = VkExtent2D{ +        .width = std::min(clear_rect.rect.extent.width, render_area.width), +        .height = std::min(clear_rect.rect.extent.height, render_area.height), +    };      if (use_color) {          VkClearValue clear_value; @@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() {  void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {      MICROPROFILE_SCOPE(Vulkan_Compute); -    update_descriptor_queue.Acquire(); -    sampled_views.clear(); -    image_views.clear();      query_cache.UpdateCounters(); @@ -570,29 +592,43 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {      // Compute dispatches can't be executed inside a renderpass      scheduler.RequestOutsideRenderPassOperationContext(); -    buffer_cache.Map(CalculateComputeStreamBufferSize()); +    image_view_indices.clear(); +    sampler_handles.clear(); + +    auto lock = texture_cache.AcquireLock(); +    texture_cache.SynchronizeComputeDescriptors();      const auto& entries = pipeline.GetEntries(); -    SetupComputeConstBuffers(entries); -    SetupComputeGlobalBuffers(entries);      SetupComputeUniformTexels(entries);      SetupComputeTextures(entries);      SetupComputeStorageTexels(entries);      SetupComputeImages(entries); -    buffer_cache.Unmap(); +    const std::span indices_span(image_view_indices.data(), image_view_indices.size()); +    texture_cache.FillComputeImageViews(indices_span, image_view_ids); -    TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, -                     VK_ACCESS_SHADER_READ_BIT); -    TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, -                     VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); +    buffer_cache.Map(CalculateComputeStreamBufferSize()); +    update_descriptor_queue.Acquire(); + +    SetupComputeConstBuffers(entries); +    SetupComputeGlobalBuffers(entries); + +    ImageViewId* image_view_id_ptr = image_view_ids.data(); +    VkSampler* sampler_ptr = sampler_handles.data(); +    PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, +                         sampler_ptr); + +    buffer_cache.Unmap(); + +    const VkPipeline pipeline_handle = pipeline.GetHandle(); +    const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); +    const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();      scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, -                      grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), -                      layout = pipeline.GetLayout(), -                      descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { +                      grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, +                      descriptor_set](vk::CommandBuffer cmdbuf) {          cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); -        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, +        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET,                                    descriptor_set, {});          cmdbuf.Dispatch(grid_x, grid_y, grid_z);      }); @@ -613,7 +649,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {      if (addr == 0 || size == 0) {          return;      } -    texture_cache.FlushRegion(addr, size); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.DownloadMemory(addr, size); +    }      buffer_cache.FlushRegion(addr, size);      query_cache.FlushRegion(addr, size);  } @@ -622,14 +661,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {      if (!Settings::IsGPULevelHigh()) {          return buffer_cache.MustFlushRegion(addr, size);      } -    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); +    return texture_cache.IsRegionGpuModified(addr, size) || +           buffer_cache.MustFlushRegion(addr, size);  }  void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {      if (addr == 0 || size == 0) {          return;      } -    texture_cache.InvalidateRegion(addr, size); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.WriteMemory(addr, size); +    }      pipeline_cache.InvalidateRegion(addr, size);      buffer_cache.InvalidateRegion(addr, size);      query_cache.InvalidateRegion(addr, size); @@ -639,17 +682,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {      if (addr == 0 || size == 0) {          return;      } -    texture_cache.OnCPUWrite(addr, size); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.WriteMemory(addr, size); +    }      pipeline_cache.OnCPUWrite(addr, size);      buffer_cache.OnCPUWrite(addr, size);  }  void RasterizerVulkan::SyncGuestHost() { -    texture_cache.SyncGuestHost();      buffer_cache.SyncGuestHost();      pipeline_cache.SyncGuestHost();  } +void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.UnmapMemory(addr, size); +    } +    buffer_cache.OnCPUWrite(addr, size); +    pipeline_cache.OnCPUWrite(addr, size); +} +  void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {      if (!gpu.IsAsync()) {          gpu_memory.Write<u32>(addr, value); @@ -700,6 +754,14 @@ void RasterizerVulkan::WaitForIdle() {      });  } +void RasterizerVulkan::FragmentBarrier() { +    // We already put barriers when a render pass finishes +} + +void RasterizerVulkan::TiledCacheBarrier() { +    // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend +} +  void RasterizerVulkan::FlushCommands() {      if (draw_counter > 0) {          draw_counter = 0; @@ -710,14 +772,20 @@ void RasterizerVulkan::FlushCommands() {  void RasterizerVulkan::TickFrame() {      draw_counter = 0;      update_descriptor_queue.TickFrame(); +    fence_manager.TickFrame();      buffer_cache.TickFrame();      staging_pool.TickFrame(); +    { +        auto lock = texture_cache.AcquireLock(); +        texture_cache.TickFrame(); +    }  } -bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, -                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, +                                             const Tegra::Engines::Fermi2D::Surface& dst,                                               const Tegra::Engines::Fermi2D::Config& copy_config) { -    texture_cache.DoFermiCopy(src, dst, copy_config); +    auto lock = texture_cache.AcquireLock(); +    texture_cache.BlitImage(dst, src, copy_config);      return true;  } @@ -727,20 +795,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,          return false;      } -    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; -    if (!surface) { +    auto lock = texture_cache.AcquireLock(); +    ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); +    if (!image_view) {          return false;      } -    // Verify that the cached surface is the same size and format as the requested framebuffer -    const auto& params{surface->GetSurfaceParams()}; -    ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); -    ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - -    screen_info.image = &surface->GetImage(); -    screen_info.width = params.width; -    screen_info.height = params.height; -    screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; +    screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); +    screen_info.width = image_view->size.width; +    screen_info.height = image_view->size.height; +    screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);      return true;  } @@ -765,103 +829,6 @@ void RasterizerVulkan::FlushWork() {      draw_counter = 0;  } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { -    MICROPROFILE_SCOPE(Vulkan_RenderTargets); - -    const auto& regs = maxwell3d.regs; -    auto& dirty = maxwell3d.dirty.flags; -    const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; -    dirty[VideoCommon::Dirty::RenderTargets] = false; - -    texture_cache.GuardRenderTargets(true); - -    Texceptions texceptions; -    for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { -        if (update_rendertargets) { -            const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); -            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); -        } -        if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { -            texceptions[rt] = true; -        } -    } - -    if (update_rendertargets) { -        const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); -        zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); -    } -    if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { -        texceptions[ZETA_TEXCEPTION_INDEX] = true; -    } - -    texture_cache.GuardRenderTargets(false); - -    return texceptions; -} - -bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { -    bool overlap = false; -    for (auto& [view, layout] : sampled_views) { -        if (!attachment.IsSameSurface(*view)) { -            continue; -        } -        overlap = true; -        *layout = VK_IMAGE_LAYOUT_GENERAL; -    } -    return overlap; -} - -std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( -    VkRenderPass renderpass) { -    FramebufferCacheKey key{ -        .renderpass = renderpass, -        .width = std::numeric_limits<u32>::max(), -        .height = std::numeric_limits<u32>::max(), -        .layers = std::numeric_limits<u32>::max(), -        .views = {}, -    }; - -    const auto try_push = [&key](const View& view) { -        if (!view) { -            return false; -        } -        key.views.push_back(view->GetAttachment()); -        key.width = std::min(key.width, view->GetWidth()); -        key.height = std::min(key.height, view->GetHeight()); -        key.layers = std::min(key.layers, view->GetNumLayers()); -        return true; -    }; - -    const auto& regs = maxwell3d.regs; -    const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); -    for (std::size_t index = 0; index < num_attachments; ++index) { -        if (try_push(color_attachments[index])) { -            texture_cache.MarkColorBufferInUse(index); -        } -    } -    if (try_push(zeta_attachment)) { -        texture_cache.MarkDepthBufferInUse(); -    } - -    const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); -    auto& framebuffer = fbentry->second; -    if (is_cache_miss) { -        framebuffer = device.GetLogical().CreateFramebuffer({ -            .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, -            .pNext = nullptr, -            .flags = 0, -            .renderPass = key.renderpass, -            .attachmentCount = static_cast<u32>(key.views.size()), -            .pAttachments = key.views.data(), -            .width = key.width, -            .height = key.height, -            .layers = key.layers, -        }); -    } - -    return {*framebuffer, VkExtent2D{key.width, key.height}}; -} -  RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,                                                                   BufferBindings& buffer_bindings,                                                                   bool is_indexed, @@ -885,50 +852,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt  void RasterizerVulkan::SetupShaderDescriptors(      const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { -    texture_cache.GuardSamplers(true); - -    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { -        // Skip VertexA stage +    image_view_indices.clear(); +    sampler_handles.clear(); +    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {          Shader* const shader = shaders[stage + 1];          if (!shader) {              continue;          }          const auto& entries = shader->GetEntries(); -        SetupGraphicsConstBuffers(entries, stage); -        SetupGraphicsGlobalBuffers(entries, stage);          SetupGraphicsUniformTexels(entries, stage);          SetupGraphicsTextures(entries, stage);          SetupGraphicsStorageTexels(entries, stage);          SetupGraphicsImages(entries, stage);      } -    texture_cache.GuardSamplers(false); -} +    const std::span indices_span(image_view_indices.data(), image_view_indices.size()); +    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); -void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, -                                             const ZetaAttachment& zeta) { -    TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); -    TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, -                     VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); +    update_descriptor_queue.Acquire(); -    for (std::size_t rt = 0; rt < color.size(); ++rt) { -        const auto color_attachment = color[rt]; -        if (color_attachment == nullptr) { +    ImageViewId* image_view_id_ptr = image_view_ids.data(); +    VkSampler* sampler_ptr = sampler_handles.data(); +    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { +        // Skip VertexA stage +        Shader* const shader = shaders[stage + 1]; +        if (!shader) {              continue;          } -        const auto image_layout = -            texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; -        color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, -                                     VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | -                                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); -    } - -    if (zeta != nullptr) { -        const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] -                                      ? VK_IMAGE_LAYOUT_GENERAL -                                      : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; -        zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, -                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | -                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); +        const auto& entries = shader->GetEntries(); +        SetupGraphicsConstBuffers(entries, stage); +        SetupGraphicsGlobalBuffers(entries, stage); +        PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, +                             sampler_ptr);      }  } @@ -1000,7 +954,7 @@ void RasterizerVulkan::EndTransformFeedback() {  void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {      const auto& regs = maxwell3d.regs; -    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { +    for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {          const auto& vertex_array = regs.vertex_array[index];          if (!vertex_array.IsEnabled()) {              continue; @@ -1009,7 +963,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {          const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};          ASSERT(end >= start); -        const std::size_t size = end - start; +        const size_t size = end - start;          if (size == 0) {              buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);              continue; @@ -1070,7 +1024,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar      }  } -void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {      MICROPROFILE_SCOPE(Vulkan_ConstBuffers);      const auto& shader_stage = maxwell3d.state.shader_stages[stage];      for (const auto& entry : entries.const_buffers) { @@ -1078,7 +1032,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s      }  } -void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {      MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);      const auto& cbufs{maxwell3d.state.shader_stages[stage]}; @@ -1088,37 +1042,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,      }  } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {      MICROPROFILE_SCOPE(Vulkan_Textures); +    const auto& regs = maxwell3d.regs; +    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;      for (const auto& entry : entries.uniform_texels) { -        const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; -        SetupUniformTexels(image, entry); +        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); +        image_view_indices.push_back(handle.image);      }  } -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {      MICROPROFILE_SCOPE(Vulkan_Textures); +    const auto& regs = maxwell3d.regs; +    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;      for (const auto& entry : entries.samplers) { -        for (std::size_t i = 0; i < entry.size; ++i) { -            const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); -            SetupTexture(texture, entry); +        for (size_t index = 0; index < entry.size; ++index) { +            const TextureHandle handle = +                GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); +            image_view_indices.push_back(handle.image); + +            Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); +            sampler_handles.push_back(sampler->Handle());          }      }  } -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {      MICROPROFILE_SCOPE(Vulkan_Textures); +    const auto& regs = maxwell3d.regs; +    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;      for (const auto& entry : entries.storage_texels) { -        const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; -        SetupStorageTexel(image, entry); +        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); +        image_view_indices.push_back(handle.image);      }  } -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {      MICROPROFILE_SCOPE(Vulkan_Images); +    const auto& regs = maxwell3d.regs; +    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;      for (const auto& entry : entries.images) { -        const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; -        SetupImage(tic, entry); +        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); +        image_view_indices.push_back(handle.image);      }  } @@ -1128,11 +1094,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {      for (const auto& entry : entries.const_buffers) {          const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];          const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); -        Tegra::Engines::ConstBufferInfo buffer; -        buffer.address = config.Address(); -        buffer.size = config.size; -        buffer.enabled = mask[entry.GetIndex()]; -        SetupConstBuffer(entry, buffer); +        const Tegra::Engines::ConstBufferInfo info{ +            .address = config.Address(), +            .size = config.size, +            .enabled = mask[entry.GetIndex()], +        }; +        SetupConstBuffer(entry, info);      }  } @@ -1147,35 +1114,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {  void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {      MICROPROFILE_SCOPE(Vulkan_Textures); +    const bool via_header_index = kepler_compute.launch_description.linked_tsc;      for (const auto& entry : entries.uniform_texels) { -        const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; -        SetupUniformTexels(image, entry); +        const TextureHandle handle = +            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); +        image_view_indices.push_back(handle.image);      }  }  void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {      MICROPROFILE_SCOPE(Vulkan_Textures); +    const bool via_header_index = kepler_compute.launch_description.linked_tsc;      for (const auto& entry : entries.samplers) { -        for (std::size_t i = 0; i < entry.size; ++i) { -            const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); -            SetupTexture(texture, entry); +        for (size_t index = 0; index < entry.size; ++index) { +            const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, +                                                        COMPUTE_SHADER_INDEX, index); +            image_view_indices.push_back(handle.image); + +            Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); +            sampler_handles.push_back(sampler->Handle());          }      }  }  void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {      MICROPROFILE_SCOPE(Vulkan_Textures); +    const bool via_header_index = kepler_compute.launch_description.linked_tsc;      for (const auto& entry : entries.storage_texels) { -        const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; -        SetupStorageTexel(image, entry); +        const TextureHandle handle = +            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); +        image_view_indices.push_back(handle.image);      }  }  void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {      MICROPROFILE_SCOPE(Vulkan_Images); +    const bool via_header_index = kepler_compute.launch_description.linked_tsc;      for (const auto& entry : entries.images) { -        const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; -        SetupImage(tic, entry); +        const TextureHandle handle = +            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); +        image_view_indices.push_back(handle.image);      }  } @@ -1186,14 +1164,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,          update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);          return;      } -      // Align the size to avoid bad std140 interactions -    const std::size_t size = -        Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); +    const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));      ASSERT(size <= MaxConstbufferSize); -    const auto info = -        buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); +    const u64 alignment = device.GetUniformBufferAlignment(); +    const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);      update_descriptor_queue.AddBuffer(info.handle, info.offset, size);  } @@ -1206,7 +1182,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd          // because Vulkan doesn't like empty buffers.          // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the          // default buffer. -        static constexpr std::size_t dummy_size = 4; +        static constexpr size_t dummy_size = 4;          const auto info = buffer_cache.GetEmptyBuffer(dummy_size);          update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);          return; @@ -1217,55 +1193,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd      update_descriptor_queue.AddBuffer(info.handle, info.offset, size);  } -void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, -                                          const UniformTexelEntry& entry) { -    const auto view = texture_cache.GetTextureSurface(tic, entry); -    ASSERT(view->IsBufferView()); - -    update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, -                                    const SamplerEntry& entry) { -    auto view = texture_cache.GetTextureSurface(texture.tic, entry); -    ASSERT(!view->IsBufferView()); - -    const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, -                                                      texture.tic.z_source, texture.tic.w_source); -    const auto sampler = sampler_cache.GetSampler(texture.tsc); -    update_descriptor_queue.AddSampledImage(sampler, image_view); - -    VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); -    *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; -    sampled_views.push_back(ImageView{std::move(view), image_layout}); -} - -void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, -                                         const StorageTexelEntry& entry) { -    const auto view = texture_cache.GetImageSurface(tic, entry); -    ASSERT(view->IsBufferView()); - -    update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { -    auto view = texture_cache.GetImageSurface(tic, entry); - -    if (entry.is_written) { -        view->MarkAsModified(texture_cache.Tick()); -    } - -    UNIMPLEMENTED_IF(tic.IsBuffer()); - -    const VkImageView image_view = -        view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); -    update_descriptor_queue.AddImage(image_view); - -    VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); -    *image_layout = VK_IMAGE_LAYOUT_GENERAL; -    image_views.push_back(ImageView{std::move(view), image_layout}); -} -  void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {      if (!state_tracker.TouchViewports()) {          return; @@ -1457,8 +1384,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&      });  } -std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { -    std::size_t size = CalculateVertexArraysSize(); +size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { +    size_t size = CalculateVertexArraysSize();      if (is_indexed) {          size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();      } @@ -1466,15 +1393,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed)      return size;  } -std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { +size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {      return Tegra::Engines::KeplerCompute::NumConstBuffers *             (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());  } -std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { +size_t RasterizerVulkan::CalculateVertexArraysSize() const {      const auto& regs = maxwell3d.regs; -    std::size_t size = 0; +    size_t size = 0;      for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {          // This implementation assumes that all attributes are used in the shader.          const GPUVAddr start{regs.vertex_array[index].StartAddress()}; @@ -1486,12 +1413,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {      return size;  } -std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { -    return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * -           static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); +size_t RasterizerVulkan::CalculateIndexBufferSize() const { +    return static_cast<size_t>(maxwell3d.regs.index_array.count) * +           static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());  } -std::size_t RasterizerVulkan::CalculateConstBufferSize( +size_t RasterizerVulkan::CalculateConstBufferSize(      const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {      if (entry.IsIndirect()) {          // Buffer is accessed indirectly, so upload the entire thing @@ -1502,37 +1429,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(      }  } -RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { -    const auto& regs = maxwell3d.regs; -    const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); - -    RenderPassParams params; -    params.color_formats = {}; -    std::size_t color_texceptions = 0; - -    std::size_t index = 0; -    for (std::size_t rt = 0; rt < num_attachments; ++rt) { -        const auto& rendertarget = regs.rt[rt]; -        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { -            continue; -        } -        params.color_formats[index] = static_cast<u8>(rendertarget.format); -        color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; -        ++index; -    } -    params.num_color_attachments = static_cast<u8>(index); -    params.texceptions = static_cast<u8>(color_texceptions); - -    params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0; -    params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; -    return params; -} -  VkBuffer RasterizerVulkan::DefaultBuffer() {      if (default_buffer) {          return *default_buffer;      } -      default_buffer = device.GetLogical().CreateBuffer({          .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,          .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 30ec58eb4..990f9e031 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -11,11 +11,11 @@  #include <vector>  #include <boost/container/static_vector.hpp> -#include <boost/functional/hash.hpp>  #include "common/common_types.h"  #include "video_core/rasterizer_accelerated.h"  #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_vulkan/blit_image.h"  #include "video_core/renderer_vulkan/fixed_pipeline_state.h"  #include "video_core/renderer_vulkan/vk_buffer_cache.h"  #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -24,10 +24,9 @@  #include "video_core/renderer_vulkan/vk_memory_manager.h"  #include "video_core/renderer_vulkan/vk_pipeline_cache.h"  #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h"  #include "video_core/renderer_vulkan/vk_scheduler.h"  #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h"  #include "video_core/renderer_vulkan/vk_texture_cache.h"  #include "video_core/renderer_vulkan/vk_update_descriptor.h"  #include "video_core/renderer_vulkan/wrapper.h" @@ -49,60 +48,9 @@ namespace Vulkan {  struct VKScreenInfo; -using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>; - -struct FramebufferCacheKey { -    VkRenderPass renderpass{}; -    u32 width = 0; -    u32 height = 0; -    u32 layers = 0; -    ImageViewsPack views; - -    std::size_t Hash() const noexcept { -        std::size_t hash = 0; -        boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass)); -        for (const auto& view : views) { -            boost::hash_combine(hash, static_cast<VkImageView>(view)); -        } -        boost::hash_combine(hash, width); -        boost::hash_combine(hash, height); -        boost::hash_combine(hash, layers); -        return hash; -    } - -    bool operator==(const FramebufferCacheKey& rhs) const noexcept { -        return std::tie(renderpass, views, width, height, layers) == -               std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); -    } - -    bool operator!=(const FramebufferCacheKey& rhs) const noexcept { -        return !operator==(rhs); -    } -}; - -} // namespace Vulkan - -namespace std { - -template <> -struct hash<Vulkan::FramebufferCacheKey> { -    std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { -        return k.Hash(); -    } -}; - -} // namespace std - -namespace Vulkan { -  class StateTracker;  class BufferBindings; -struct ImageView { -    View view; -    VkImageLayout* layout = nullptr; -}; -  class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {  public:      explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -123,15 +71,18 @@ public:      void InvalidateRegion(VAddr addr, u64 size) override;      void OnCPUWrite(VAddr addr, u64 size) override;      void SyncGuestHost() override; +    void UnmapMemory(VAddr addr, u64 size) override;      void SignalSemaphore(GPUVAddr addr, u32 value) override;      void SignalSyncPoint(u32 value) override;      void ReleaseFences() override;      void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void WaitForIdle() override; +    void FragmentBarrier() override; +    void TiledCacheBarrier() override;      void FlushCommands() override;      void TickFrame() override; -    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, -                               const Tegra::Engines::Fermi2D::Regs::Surface& dst, +    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, +                               const Tegra::Engines::Fermi2D::Surface& dst,                                 const Tegra::Engines::Fermi2D::Config& copy_config) override;      bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,                             u32 pixel_stride) override; @@ -145,11 +96,17 @@ public:      }      /// Maximum supported size that a constbuffer can have in bytes. -    static constexpr std::size_t MaxConstbufferSize = 0x10000; +    static constexpr size_t MaxConstbufferSize = 0x10000;      static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,                    "The maximum size of a constbuffer must be a multiple of the size of GLvec4");  private: +    static constexpr size_t MAX_TEXTURES = 192; +    static constexpr size_t MAX_IMAGES = 48; +    static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + +    static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); +      struct DrawParameters {          void Draw(vk::CommandBuffer cmdbuf) const; @@ -160,23 +117,8 @@ private:          bool is_indexed = 0;      }; -    using ColorAttachments = std::array<View, Maxwell::NumRenderTargets>; -    using ZetaAttachment = View; - -    using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; - -    static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; -    static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); -      void FlushWork(); -    /// @brief Updates the currently bound attachments -    /// @param is_clear True when the framebuffer is updated as a clear -    /// @return Bitfield of attachments being used as sampled textures -    Texceptions UpdateAttachments(bool is_clear); - -    std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); -      /// Setups geometry buffers and state.      DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,                                   bool is_indexed, bool is_instanced); @@ -184,17 +126,12 @@ private:      /// Setup descriptors in the graphics pipeline.      void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); -    void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, -                               const ZetaAttachment& zeta); -      void UpdateDynamicStates();      void BeginTransformFeedback();      void EndTransformFeedback(); -    bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); -      void SetupVertexArrays(BufferBindings& buffer_bindings);      void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); @@ -240,14 +177,6 @@ private:      void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); -    void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); - -    void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); - -    void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); - -    void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); -      void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);      void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);      void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -264,18 +193,16 @@ private:      void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);      void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); -    std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; - -    std::size_t CalculateComputeStreamBufferSize() const; +    size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; -    std::size_t CalculateVertexArraysSize() const; +    size_t CalculateComputeStreamBufferSize() const; -    std::size_t CalculateIndexBufferSize() const; +    size_t CalculateVertexArraysSize() const; -    std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, -                                         const Tegra::Engines::ConstBufferInfo& buffer) const; +    size_t CalculateIndexBufferSize() const; -    RenderPassParams GetRenderPassParams(Texceptions texceptions) const; +    size_t CalculateConstBufferSize(const ConstBufferEntry& entry, +                                    const Tegra::Engines::ConstBufferInfo& buffer) const;      VkBuffer DefaultBuffer(); @@ -290,18 +217,19 @@ private:      StateTracker& state_tracker;      VKScheduler& scheduler; +    VKStreamBuffer stream_buffer;      VKStagingBufferPool staging_pool;      VKDescriptorPool descriptor_pool;      VKUpdateDescriptorQueue update_descriptor_queue; -    VKRenderPassCache renderpass_cache; +    BlitImageHelper blit_image;      QuadArrayPass quad_array_pass;      QuadIndexedPass quad_indexed_pass;      Uint8Pass uint8_pass; -    VKTextureCache texture_cache; +    TextureCacheRuntime texture_cache_runtime; +    TextureCache texture_cache;      VKPipelineCache pipeline_cache;      VKBufferCache buffer_cache; -    VKSamplerCache sampler_cache;      VKQueryCache query_cache;      VKFenceManager fence_manager; @@ -310,16 +238,11 @@ private:      vk::Event wfi_event;      VideoCommon::Shader::AsyncShaders async_shaders; -    ColorAttachments color_attachments; -    ZetaAttachment zeta_attachment; - -    std::vector<ImageView> sampled_views; -    std::vector<ImageView> image_views; +    boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; +    std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; +    boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;      u32 draw_counter = 0; - -    // TODO(Rodrigo): Invalidate on image destruction -    std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;  };  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp deleted file mode 100644 index e812c7dd6..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> -#include <memory> -#include <vector> - -#include "common/cityhash.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -std::size_t RenderPassParams::Hash() const noexcept { -    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); -    return static_cast<std::size_t>(hash); -} - -bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { -    return std::memcmp(&rhs, this, sizeof *this) == 0; -} - -VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {} - -VKRenderPassCache::~VKRenderPassCache() = default; - -VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { -    const auto [pair, is_cache_miss] = cache.try_emplace(params); -    auto& entry = pair->second; -    if (is_cache_miss) { -        entry = CreateRenderPass(params); -    } -    return *entry; -} - -vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { -    using namespace VideoCore::Surface; -    const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); - -    std::vector<VkAttachmentDescription> descriptors; -    descriptors.reserve(num_attachments); - -    std::vector<VkAttachmentReference> color_references; -    color_references.reserve(num_attachments); - -    for (std::size_t rt = 0; rt < num_attachments; ++rt) { -        const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); -        const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); -        const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); -        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", -                   static_cast<int>(pixel_format)); - -        // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. -        const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 -                                               ? VK_IMAGE_LAYOUT_GENERAL -                                               : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; -        descriptors.push_back({ -            .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, -            .format = format.format, -            .samples = VK_SAMPLE_COUNT_1_BIT, -            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, -            .storeOp = VK_ATTACHMENT_STORE_OP_STORE, -            .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, -            .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, -            .initialLayout = color_layout, -            .finalLayout = color_layout, -        }); - -        color_references.push_back({ -            .attachment = static_cast<u32>(rt), -            .layout = color_layout, -        }); -    } - -    VkAttachmentReference zeta_attachment_ref; -    const bool has_zeta = params.zeta_format != 0; -    if (has_zeta) { -        const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format); -        const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); -        const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); -        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", -                   static_cast<int>(pixel_format)); - -        const VkImageLayout zeta_layout = params.zeta_texception != 0 -                                              ? VK_IMAGE_LAYOUT_GENERAL -                                              : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; -        descriptors.push_back({ -            .flags = 0, -            .format = format.format, -            .samples = VK_SAMPLE_COUNT_1_BIT, -            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, -            .storeOp = VK_ATTACHMENT_STORE_OP_STORE, -            .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, -            .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, -            .initialLayout = zeta_layout, -            .finalLayout = zeta_layout, -        }); - -        zeta_attachment_ref = { -            .attachment = static_cast<u32>(num_attachments), -            .layout = zeta_layout, -        }; -    } - -    const VkSubpassDescription subpass_description{ -        .flags = 0, -        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, -        .inputAttachmentCount = 0, -        .pInputAttachments = nullptr, -        .colorAttachmentCount = static_cast<u32>(color_references.size()), -        .pColorAttachments = color_references.data(), -        .pResolveAttachments = nullptr, -        .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, -        .preserveAttachmentCount = 0, -        .pPreserveAttachments = nullptr, -    }; - -    VkAccessFlags access = 0; -    VkPipelineStageFlags stage = 0; -    if (!color_references.empty()) { -        access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; -        stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -    } - -    if (has_zeta) { -        access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | -                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; -        stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; -    } - -    const VkSubpassDependency subpass_dependency{ -        .srcSubpass = VK_SUBPASS_EXTERNAL, -        .dstSubpass = 0, -        .srcStageMask = stage, -        .dstStageMask = stage, -        .srcAccessMask = 0, -        .dstAccessMask = access, -        .dependencyFlags = 0, -    }; - -    return device.GetLogical().CreateRenderPass({ -        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, -        .pNext = nullptr, -        .flags = 0, -        .attachmentCount = static_cast<u32>(descriptors.size()), -        .pAttachments = descriptors.data(), -        .subpassCount = 1, -        .pSubpasses = &subpass_description, -        .dependencyCount = 1, -        .pDependencies = &subpass_dependency, -    }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h deleted file mode 100644 index 652ecef7b..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <type_traits> -#include <unordered_map> - -#include <boost/container/static_vector.hpp> -#include <boost/functional/hash.hpp> - -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h" - -namespace Vulkan { - -class VKDevice; - -struct RenderPassParams { -    std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats; -    u8 num_color_attachments; -    u8 texceptions; - -    u8 zeta_format; -    u8 zeta_texception; - -    std::size_t Hash() const noexcept; - -    bool operator==(const RenderPassParams& rhs) const noexcept; - -    bool operator!=(const RenderPassParams& rhs) const noexcept { -        return !operator==(rhs); -    } -}; -static_assert(std::has_unique_object_representations_v<RenderPassParams>); -static_assert(std::is_trivially_copyable_v<RenderPassParams>); -static_assert(std::is_trivially_constructible_v<RenderPassParams>); - -} // namespace Vulkan - -namespace std { - -template <> -struct hash<Vulkan::RenderPassParams> { -    std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { -        return k.Hash(); -    } -}; - -} // namespace std - -namespace Vulkan { - -class VKRenderPassCache final { -public: -    explicit VKRenderPassCache(const VKDevice& device_); -    ~VKRenderPassCache(); - -    VkRenderPass GetRenderPass(const RenderPassParams& params); - -private: -    vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; - -    const VKDevice& device; -    std::unordered_map<RenderPassParams, vk::RenderPass> cache; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp deleted file mode 100644 index b859691fa..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <unordered_map> - -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/textures/texture.h" - -using Tegra::Texture::TextureMipmapFilter; - -namespace Vulkan { - -namespace { - -VkBorderColor ConvertBorderColor(std::array<float, 4> color) { -    // TODO(Rodrigo): Manage integer border colors -    if (color == std::array<float, 4>{0, 0, 0, 0}) { -        return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; -    } else if (color == std::array<float, 4>{0, 0, 0, 1}) { -        return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; -    } else if (color == std::array<float, 4>{1, 1, 1, 1}) { -        return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; -    } -    if (color[0] + color[1] + color[2] > 1.35f) { -        // If color elements are brighter than roughly 0.5 average, use white border -        return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; -    } else if (color[3] > 0.5f) { -        return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; -    } else { -        return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; -    } -} - -} // Anonymous namespace - -VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {} - -VKSamplerCache::~VKSamplerCache() = default; - -vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { -    const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); -    const std::array color = tsc.GetBorderColor(); - -    VkSamplerCustomBorderColorCreateInfoEXT border{ -        .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, -        .pNext = nullptr, -        .customBorderColor = {}, -        .format = VK_FORMAT_UNDEFINED, -    }; -    std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - -    return device.GetLogical().CreateSampler({ -        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, -        .pNext = arbitrary_borders ? &border : nullptr, -        .flags = 0, -        .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), -        .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), -        .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), -        .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), -        .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), -        .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), -        .mipLodBias = tsc.GetLodBias(), -        .anisotropyEnable = -            static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), -        .maxAnisotropy = tsc.GetMaxAnisotropy(), -        .compareEnable = tsc.depth_compare_enabled, -        .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), -        .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), -        .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), -        .borderColor = -            arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), -        .unnormalizedCoordinates = VK_FALSE, -    }); -} - -VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { -    return *sampler; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h deleted file mode 100644 index 3f22c4610..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/sampler_cache.h" -#include "video_core/textures/texture.h" - -namespace Vulkan { - -class VKDevice; - -class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> { -public: -    explicit VKSamplerCache(const VKDevice& device_); -    ~VKSamplerCache(); - -protected: -    vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - -    VkSampler ToSamplerType(const vk::Sampler& sampler) const override; - -private: -    const VKDevice& device; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 1a483dc71..c104c6fe3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -16,6 +16,7 @@  #include "video_core/renderer_vulkan/vk_query_cache.h"  #include "video_core/renderer_vulkan/vk_scheduler.h"  #include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h"  #include "video_core/renderer_vulkan/wrapper.h"  namespace Vulkan { @@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() {      AcquireNewChunk();  } -void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, -                                    VkExtent2D render_area) { -    if (renderpass == state.renderpass && framebuffer == state.framebuffer && +void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) { +    const VkRenderPass renderpass = framebuffer->RenderPass(); +    const VkFramebuffer framebuffer_handle = framebuffer->Handle(); +    const VkExtent2D render_area = framebuffer->RenderArea(); +    if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&          render_area.width == state.render_area.width &&          render_area.height == state.render_area.height) {          return;      } -    const bool end_renderpass = state.renderpass != nullptr; +    EndRenderPass();      state.renderpass = renderpass; -    state.framebuffer = framebuffer; +    state.framebuffer = framebuffer_handle;      state.render_area = render_area; -    const VkRenderPassBeginInfo renderpass_bi{ -        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, -        .pNext = nullptr, -        .renderPass = renderpass, -        .framebuffer = framebuffer, -        .renderArea = -            { -                .offset = {.x = 0, .y = 0}, -                .extent = render_area, -            }, -        .clearValueCount = 0, -        .pClearValues = nullptr, -    }; - -    Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { -        if (end_renderpass) { -            cmdbuf.EndRenderPass(); -        } +    Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) { +        const VkRenderPassBeginInfo renderpass_bi{ +            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, +            .pNext = nullptr, +            .renderPass = renderpass, +            .framebuffer = framebuffer_handle, +            .renderArea = +                { +                    .offset = {.x = 0, .y = 0}, +                    .extent = render_area, +                }, +            .clearValueCount = 0, +            .pClearValues = nullptr, +        };          cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);      }); +    num_renderpass_images = framebuffer->NumImages(); +    renderpass_images = framebuffer->Images(); +    renderpass_image_ranges = framebuffer->ImageRanges();  }  void VKScheduler::RequestOutsideRenderPassOperationContext() { @@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() {      if (!state.renderpass) {          return;      } +    Record([num_images = num_renderpass_images, images = renderpass_images, +            ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { +        std::array<VkImageMemoryBarrier, 9> barriers; +        for (size_t i = 0; i < num_images; ++i) { +            barriers[i] = VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = images[i], +                .subresourceRange = ranges[i], +            }; +        } +        cmdbuf.EndRenderPass(); +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | +                                   VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | +                                   VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, +                               VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, +                               vk::Span(barriers.data(), num_images)); +    });      state.renderpass = nullptr; -    Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); +    num_renderpass_images = 0;  }  void VKScheduler::AcquireNewChunk() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6d3a5da0b..0a36c8fad 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -17,6 +17,7 @@  namespace Vulkan {  class CommandPool; +class Framebuffer;  class MasterSemaphore;  class StateTracker;  class VKDevice; @@ -52,8 +53,7 @@ public:      void DispatchWork();      /// Requests to begin a renderpass. -    void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, -                           VkExtent2D render_area); +    void RequestRenderpass(const Framebuffer* framebuffer);      /// Requests the current executino context to be able to execute operations only allowed outside      /// of a renderpass. @@ -62,6 +62,9 @@ public:      /// Binds a pipeline to the current execution context.      void BindGraphicsPipeline(VkPipeline pipeline); +    /// Invalidates current command buffer state except for render passes +    void InvalidateState(); +      /// Assigns the query cache.      void SetQueryCache(VKQueryCache& query_cache_) {          query_cache = &query_cache_; @@ -170,8 +173,6 @@ private:      void AllocateNewContext(); -    void InvalidateState(); -      void EndPendingOperations();      void EndRenderPass(); @@ -192,6 +193,11 @@ private:      std::thread worker_thread;      State state; + +    u32 num_renderpass_images = 0; +    std::array<VkImage, 9> renderpass_images{}; +    std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; +      Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;      Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;      std::mutex mutex; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 72954d0e3..09d6f9f35 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -102,7 +102,7 @@ struct GenericVaryingDescription {      bool is_scalar = false;  }; -spv::Dim GetSamplerDim(const Sampler& sampler) { +spv::Dim GetSamplerDim(const SamplerEntry& sampler) {      ASSERT(!sampler.is_buffer);      switch (sampler.type) {      case Tegra::Shader::TextureType::Texture1D: @@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) {      }  } -std::pair<spv::Dim, bool> GetImageDim(const Image& image) { +std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {      switch (image.type) {      case Tegra::Shader::ImageType::Texture1D:          return {spv::Dim::Dim1D, false}; @@ -980,7 +980,7 @@ private:          return binding;      } -    void DeclareImage(const Image& image, u32& binding) { +    void DeclareImage(const ImageEntry& image, u32& binding) {          const auto [dim, arrayed] = GetImageDim(image);          constexpr int depth = 0;          constexpr bool ms = false; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index df1812514..ad91ad5de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -21,10 +21,10 @@ class VKDevice;  namespace Vulkan {  using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::Sampler; -using SamplerEntry = VideoCommon::Shader::Sampler; -using StorageTexelEntry = VideoCommon::Shader::Image; -using ImageEntry = VideoCommon::Shader::Image; +using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using StorageTexelEntry = VideoCommon::Shader::ImageEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry;  constexpr u32 DESCRIPTOR_SET = 0; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index c1a218d76..38a0be7f2 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -13,18 +13,13 @@  namespace Vulkan { -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { -    // Avoid undefined behavior by copying to a staging allocation -    ASSERT(code_size % sizeof(u32) == 0); -    const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); -    std::memcpy(data.get(), code_data, code_size); - +vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) {      return device.GetLogical().CreateShaderModule({          .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,          .pNext = nullptr,          .flags = 0, -        .codeSize = code_size, -        .pCode = data.get(), +        .codeSize = static_cast<u32>(code.size_bytes()), +        .pCode = code.data(),      });  } diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index d1d3f3cae..dce34a140 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -4,6 +4,8 @@  #pragma once +#include <span> +  #include "common/common_types.h"  #include "video_core/renderer_vulkan/wrapper.h" @@ -11,6 +13,6 @@ namespace Vulkan {  class VKDevice; -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); +vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code);  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 50164cc08..1779a2e30 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include <algorithm> +#include <array>  #include <cstddef>  #include <iterator> @@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table;  using Flags = Maxwell3D::DirtyState::Flags;  Flags MakeInvalidationFlags() { +    static constexpr std::array INVALIDATION_FLAGS{ +        Viewports,         Scissors,  DepthBias,         BlendConstants,    DepthBounds, +        StencilProperties, CullMode,  DepthBoundsEnable, DepthTestEnable,   DepthWriteEnable, +        DepthCompareOp,    FrontFace, StencilOp,         StencilTestEnable, +    };      Flags flags{}; -    flags[Viewports] = true; -    flags[Scissors] = true; -    flags[DepthBias] = true; -    flags[BlendConstants] = true; -    flags[DepthBounds] = true; -    flags[StencilProperties] = true; -    flags[CullMode] = true; -    flags[DepthBoundsEnable] = true; -    flags[DepthTestEnable] = true; -    flags[DepthWriteEnable] = true; -    flags[DepthCompareOp] = true; -    flags[FrontFace] = true; -    flags[StencilOp] = true; -    flags[StencilTestEnable] = true; +    for (const int flag : INVALIDATION_FLAGS) { +        flags[flag] = true; +    }      return flags;  } diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1de789e57..c335d2bdf 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -52,6 +52,14 @@ public:          current_topology = INVALID_TOPOLOGY;      } +    void InvalidateViewports() { +        flags[Dirty::Viewports] = true; +    } + +    void InvalidateScissors() { +        flags[Dirty::Scissors] = true; +    } +      bool TouchViewports() {          return Exchange(Dirty::Viewports, false);      } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 1b59612b9..419cb154d 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -19,6 +19,10 @@ namespace Vulkan {  namespace { +constexpr VkBufferUsageFlags BUFFER_USAGE = +    VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | +    VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; +  constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;  constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; @@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,  } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, -                               VkBufferUsageFlags usage) +VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_)      : device{device_}, scheduler{scheduler_} { -    CreateBuffers(usage); +    CreateBuffers();      ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);      ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);  }  VKStreamBuffer::~VKStreamBuffer() = default; -std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { +std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {      ASSERT(size <= stream_buffer_size);      mapped_size = size; @@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {      WaitPendingOperations(offset); -    bool invalidated = false;      if (offset + size > stream_buffer_size) {          // The buffer would overflow, save the amount of used watches and reset the state.          invalidation_mark = current_watch_cursor; @@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {          // Ensure that we don't wait for uncommitted fences.          scheduler.Flush(); - -        invalidated = true;      } -    return {memory.Map(offset, size), offset, invalidated}; +    return std::make_pair(memory.Map(offset, size), offset);  }  void VKStreamBuffer::Unmap(u64 size) { @@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) {      watch.tick = scheduler.CurrentTick();  } -void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { +void VKStreamBuffer::CreateBuffers() {      const auto memory_properties = device.GetPhysical().GetMemoryProperties();      const u32 preferred_type = GetMemoryType(memory_properties);      const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; @@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {          .pNext = nullptr,          .flags = 0,          .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), -        .usage = usage, +        .usage = BUFFER_USAGE,          .sharingMode = VK_SHARING_MODE_EXCLUSIVE,          .queueFamilyIndexCount = 0,          .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 5e15ad78f..1428f77bf 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -5,7 +5,7 @@  #pragma once  #include <optional> -#include <tuple> +#include <utility>  #include <vector>  #include "common/common_types.h" @@ -19,17 +19,15 @@ class VKScheduler;  class VKStreamBuffer final {  public: -    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, -                            VkBufferUsageFlags usage); +    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler);      ~VKStreamBuffer();      /**       * Reserves a region of memory from the stream buffer.       * @param size Size to reserve. -     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer -     * offset and a boolean that's true when buffer has been invalidated. +     * @returns A pair of a raw memory pointer (with offset added), and the buffer offset       */ -    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); +    std::pair<u8*, u64> Map(u64 size, u64 alignment);      /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.      void Unmap(u64 size); @@ -49,7 +47,7 @@ private:      };      /// Creates Vulkan buffer handles committing the required the required memory. -    void CreateBuffers(VkBufferUsageFlags usage); +    void CreateBuffers();      /// Increases the amount of watches available.      void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae2e3322c..261808391 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -4,614 +4,1103 @@  #include <algorithm>  #include <array> -#include <cstddef> -#include <cstring> -#include <memory> -#include <variant> +#include <span>  #include <vector> -#include "common/assert.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/morton.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/blit_image.h"  #include "video_core/renderer_vulkan/maxwell_to_vk.h"  #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h"  #include "video_core/renderer_vulkan/vk_scheduler.h"  #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"  #include "video_core/renderer_vulkan/vk_texture_cache.h"  #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h"  namespace Vulkan { -using VideoCore::MortonSwizzle; -using VideoCore::MortonSwizzleMode; - +using Tegra::Engines::Fermi2D;  using Tegra::Texture::SwizzleSource; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; +using Tegra::Texture::TextureMipmapFilter; +using VideoCommon::BufferImageCopy; +using VideoCommon::ImageInfo; +using VideoCommon::ImageType; +using VideoCommon::SubresourceRange; +using VideoCore::Surface::IsPixelFormatASTC;  namespace { -VkImageType SurfaceTargetToImage(SurfaceTarget target) { -    switch (target) { -    case SurfaceTarget::Texture1D: -    case SurfaceTarget::Texture1DArray: +constexpr std::array ATTACHMENT_REFERENCES{ +    VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, +    VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { +    if (color == std::array<float, 4>{0, 0, 0, 0}) { +        return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; +    } else if (color == std::array<float, 4>{0, 0, 0, 1}) { +        return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; +    } else if (color == std::array<float, 4>{1, 1, 1, 1}) { +        return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; +    } +    if (color[0] + color[1] + color[2] > 1.35f) { +        // If color elements are brighter than roughly 0.5 average, use white border +        return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; +    } else if (color[3] > 0.5f) { +        return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; +    } else { +        return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; +    } +} + +[[nodiscard]] VkImageType ConvertImageType(const ImageType type) { +    switch (type) { +    case ImageType::e1D:          return VK_IMAGE_TYPE_1D; -    case SurfaceTarget::Texture2D: -    case SurfaceTarget::Texture2DArray: -    case SurfaceTarget::TextureCubemap: -    case SurfaceTarget::TextureCubeArray: +    case ImageType::e2D: +    case ImageType::Linear:          return VK_IMAGE_TYPE_2D; -    case SurfaceTarget::Texture3D: +    case ImageType::e3D:          return VK_IMAGE_TYPE_3D; -    case SurfaceTarget::TextureBuffer: -        UNREACHABLE(); -        return {}; +    case ImageType::Buffer: +        break;      } -    UNREACHABLE_MSG("Unknown texture target={}", target); +    UNREACHABLE_MSG("Invalid image type={}", type);      return {};  } -VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { -    if (pixel_format < PixelFormat::MaxColorFormat) { -        return VK_IMAGE_ASPECT_COLOR_BIT; -    } else if (pixel_format < PixelFormat::MaxDepthFormat) { -        return VK_IMAGE_ASPECT_DEPTH_BIT; -    } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { -        return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; -    } else { -        UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); -        return VK_IMAGE_ASPECT_COLOR_BIT; +[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) { +    switch (num_samples) { +    case 1: +        return VK_SAMPLE_COUNT_1_BIT; +    case 2: +        return VK_SAMPLE_COUNT_2_BIT; +    case 4: +        return VK_SAMPLE_COUNT_4_BIT; +    case 8: +        return VK_SAMPLE_COUNT_8_BIT; +    case 16: +        return VK_SAMPLE_COUNT_16_BIT; +    default: +        UNREACHABLE_MSG("Invalid number of samples={}", num_samples); +        return VK_SAMPLE_COUNT_1_BIT;      }  } -VkImageViewType GetImageViewType(SurfaceTarget target) { -    switch (target) { -    case SurfaceTarget::Texture1D: -        return VK_IMAGE_VIEW_TYPE_1D; -    case SurfaceTarget::Texture2D: -        return VK_IMAGE_VIEW_TYPE_2D; -    case SurfaceTarget::Texture3D: -        return VK_IMAGE_VIEW_TYPE_3D; -    case SurfaceTarget::Texture1DArray: -        return VK_IMAGE_VIEW_TYPE_1D_ARRAY; -    case SurfaceTarget::Texture2DArray: -        return VK_IMAGE_VIEW_TYPE_2D_ARRAY; -    case SurfaceTarget::TextureCubemap: -        return VK_IMAGE_VIEW_TYPE_CUBE; -    case SurfaceTarget::TextureCubeArray: -        return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; -    case SurfaceTarget::TextureBuffer: -        break; +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) { +    const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); +    VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; +    if (info.type == ImageType::e2D && info.resources.layers >= 6 && +        info.size.width == info.size.height) { +        flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;      } -    UNREACHABLE(); -    return {}; -} - -vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, -                        std::size_t host_memory_size) { -    // TODO(Rodrigo): Move texture buffer creation to the buffer cache -    return device.GetLogical().CreateBuffer({ -        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, +    if (info.type == ImageType::e3D) { +        flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; +    } +    VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | +                              VK_IMAGE_USAGE_SAMPLED_BIT; +    if (format_info.attachable) { +        switch (VideoCore::Surface::GetFormatType(info.format)) { +        case VideoCore::Surface::SurfaceType::ColorTexture: +            usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +            break; +        case VideoCore::Surface::SurfaceType::Depth: +        case VideoCore::Surface::SurfaceType::DepthStencil: +            usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; +            break; +        default: +            UNREACHABLE_MSG("Invalid surface type"); +        } +    } +    if (format_info.storage) { +        usage |= VK_IMAGE_USAGE_STORAGE_BIT; +    } +    const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); +    return VkImageCreateInfo{ +        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,          .pNext = nullptr, -        .flags = 0, -        .size = static_cast<VkDeviceSize>(host_memory_size), -        .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | -                 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | -                 VK_BUFFER_USAGE_TRANSFER_DST_BIT, +        .flags = flags, +        .imageType = ConvertImageType(info.type), +        .format = format_info.format, +        .extent = +            { +                .width = info.size.width >> samples_x, +                .height = info.size.height >> samples_y, +                .depth = info.size.depth, +            }, +        .mipLevels = static_cast<u32>(info.resources.levels), +        .arrayLayers = static_cast<u32>(info.resources.layers), +        .samples = ConvertSampleCount(info.num_samples), +        .tiling = VK_IMAGE_TILING_OPTIMAL, +        .usage = usage,          .sharingMode = VK_SHARING_MODE_EXCLUSIVE,          .queueFamilyIndexCount = 0,          .pQueueFamilyIndices = nullptr, -    }); -} - -VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, -                                                    const SurfaceParams& params, VkBuffer buffer, -                                                    std::size_t host_memory_size) { -    ASSERT(params.IsBuffer()); - -    return { -        .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, -        .pNext = nullptr, -        .flags = 0, -        .buffer = buffer, -        .format = -            MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, -        .offset = 0, -        .range = static_cast<VkDeviceSize>(host_memory_size), +        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,      };  } -VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { -    ASSERT(!params.IsBuffer()); - -    const auto [format, attachable, storage] = -        MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); +[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) { +    if (info.type == ImageType::Buffer) { +        return vk::Image{}; +    } +    return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); +} -    VkImageCreateInfo ci{ -        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, +[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) { +    if (info.type != ImageType::Buffer) { +        return vk::Buffer{}; +    } +    const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); +    return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ +        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,          .pNext = nullptr,          .flags = 0, -        .imageType = SurfaceTargetToImage(params.target), -        .format = format, -        .extent = {}, -        .mipLevels = params.num_levels, -        .arrayLayers = static_cast<u32>(params.GetNumLayers()), -        .samples = VK_SAMPLE_COUNT_1_BIT, -        .tiling = VK_IMAGE_TILING_OPTIMAL, -        .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | -                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, +        .size = info.size.width * bytes_per_block, +        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | +                 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | +                 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,          .sharingMode = VK_SHARING_MODE_EXCLUSIVE,          .queueFamilyIndexCount = 0,          .pQueueFamilyIndices = nullptr, -        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, -    }; -    if (attachable) { -        ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT -                                               : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; -    } -    if (storage) { -        ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; -    } - -    switch (params.target) { -    case SurfaceTarget::TextureCubemap: -    case SurfaceTarget::TextureCubeArray: -        ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; -        [[fallthrough]]; -    case SurfaceTarget::Texture1D: -    case SurfaceTarget::Texture1DArray: -    case SurfaceTarget::Texture2D: -    case SurfaceTarget::Texture2DArray: -        ci.extent = {params.width, params.height, 1}; -        break; -    case SurfaceTarget::Texture3D: -        ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; -        ci.extent = {params.width, params.height, params.depth}; -        break; -    case SurfaceTarget::TextureBuffer: -        UNREACHABLE(); -    } - -    return ci; +    });  } -u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, -                  SwizzleSource w_source) { -    return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | -           (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); +[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { +    switch (VideoCore::Surface::GetFormatType(format)) { +    case VideoCore::Surface::SurfaceType::ColorTexture: +        return VK_IMAGE_ASPECT_COLOR_BIT; +    case VideoCore::Surface::SurfaceType::Depth: +        return VK_IMAGE_ASPECT_DEPTH_BIT; +    case VideoCore::Surface::SurfaceType::DepthStencil: +        return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; +    default: +        UNREACHABLE_MSG("Invalid surface type"); +        return VkImageAspectFlags{}; +    }  } -} // Anonymous namespace - -CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, -                             VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, -                             GPUVAddr gpu_addr_, const SurfaceParams& params_) -    : SurfaceBase<View>{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_}, -      memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} { -    if (params.IsBuffer()) { -        buffer = CreateBuffer(device, params, host_memory_size); -        commit = memory_manager.Commit(buffer, false); - -        const auto buffer_view_ci = -            GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); -        format = buffer_view_ci.format; - -        buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); -    } else { -        const auto image_ci = GenerateImageCreateInfo(device, params); -        format = image_ci.format; - -        image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); -        commit = memory_manager.Commit(image->GetHandle(), false); +[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) { +    if (info.IsRenderTarget()) { +        return ImageAspectMask(info.format);      } - -    // TODO(Rodrigo): Move this to a virtual function. -    u32 num_layers = 1; -    if (params.is_layered || params.target == SurfaceTarget::Texture3D) { -        num_layers = params.depth; +    const bool is_first = info.Swizzle()[0] == SwizzleSource::R; +    switch (info.format) { +    case PixelFormat::D24_UNORM_S8_UINT: +    case PixelFormat::D32_FLOAT_S8_UINT: +        return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; +    case PixelFormat::S8_UINT_D24_UNORM: +        return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; +    case PixelFormat::D16_UNORM: +    case PixelFormat::D32_FLOAT: +        return VK_IMAGE_ASPECT_DEPTH_BIT; +    default: +        return VK_IMAGE_ASPECT_COLOR_BIT;      } -    main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));  } -CachedSurface::~CachedSurface() = default; - -void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { -    // To upload data we have to be outside of a renderpass -    scheduler.RequestOutsideRenderPassOperationContext(); +[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device, +                                                            const ImageView* image_view) { +    const auto pixel_format = image_view->format; +    return VkAttachmentDescription{ +        .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, +        .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format, +        .samples = image_view->Samples(), +        .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, +        .storeOp = VK_ATTACHMENT_STORE_OP_STORE, +        .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, +        .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, +        .initialLayout = VK_IMAGE_LAYOUT_GENERAL, +        .finalLayout = VK_IMAGE_LAYOUT_GENERAL, +    }; +} -    if (params.IsBuffer()) { -        UploadBuffer(staging_buffer); -    } else { -        UploadImage(staging_buffer); +[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { +    switch (swizzle) { +    case SwizzleSource::Zero: +        return VK_COMPONENT_SWIZZLE_ZERO; +    case SwizzleSource::R: +        return VK_COMPONENT_SWIZZLE_R; +    case SwizzleSource::G: +        return VK_COMPONENT_SWIZZLE_G; +    case SwizzleSource::B: +        return VK_COMPONENT_SWIZZLE_B; +    case SwizzleSource::A: +        return VK_COMPONENT_SWIZZLE_A; +    case SwizzleSource::OneFloat: +    case SwizzleSource::OneInt: +        return VK_COMPONENT_SWIZZLE_ONE;      } +    UNREACHABLE_MSG("Invalid swizzle={}", swizzle); +    return VK_COMPONENT_SWIZZLE_ZERO;  } -void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { -    UNIMPLEMENTED_IF(params.IsBuffer()); - -    if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { -        LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); +[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { +    switch (type) { +    case VideoCommon::ImageViewType::e1D: +        return VK_IMAGE_VIEW_TYPE_1D; +    case VideoCommon::ImageViewType::e2D: +        return VK_IMAGE_VIEW_TYPE_2D; +    case VideoCommon::ImageViewType::Cube: +        return VK_IMAGE_VIEW_TYPE_CUBE; +    case VideoCommon::ImageViewType::e3D: +        return VK_IMAGE_VIEW_TYPE_3D; +    case VideoCommon::ImageViewType::e1DArray: +        return VK_IMAGE_VIEW_TYPE_1D_ARRAY; +    case VideoCommon::ImageViewType::e2DArray: +        return VK_IMAGE_VIEW_TYPE_2D_ARRAY; +    case VideoCommon::ImageViewType::CubeArray: +        return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; +    case VideoCommon::ImageViewType::Rect: +        LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); +        return VK_IMAGE_VIEW_TYPE_2D; +    case VideoCommon::ImageViewType::Buffer: +        UNREACHABLE_MSG("Texture buffers can't be image views"); +        return VK_IMAGE_VIEW_TYPE_1D;      } +    UNREACHABLE_MSG("Invalid image view type={}", type); +    return VK_IMAGE_VIEW_TYPE_2D; +} -    // We can't copy images to buffers inside a renderpass -    scheduler.RequestOutsideRenderPassOperationContext(); +[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers( +    VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) { +    return VkImageSubresourceLayers{ +        .aspectMask = aspect_mask, +        .mipLevel = static_cast<u32>(subresource.base_level), +        .baseArrayLayer = static_cast<u32>(subresource.base_layer), +        .layerCount = static_cast<u32>(subresource.num_layers), +    }; +} -    FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, -                   VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); +[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) { +    return VkOffset3D{ +        .x = offset3d.x, +        .y = offset3d.y, +        .z = offset3d.z, +    }; +} -    const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); -    // TODO(Rodrigo): Do this in a single copy -    for (u32 level = 0; level < params.num_levels; ++level) { -        scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, -                          copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { -            cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); -        }); -    } -    scheduler.Finish(); +[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) { +    return VkExtent3D{ +        .width = static_cast<u32>(extent3d.width), +        .height = static_cast<u32>(extent3d.height), +        .depth = static_cast<u32>(extent3d.depth), +    }; +} -    // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. -    std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), -                host_memory_size); +[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy, +                                        VkImageAspectFlags aspect_mask) noexcept { +    return VkImageCopy{ +        .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask), +        .srcOffset = MakeOffset3D(copy.src_offset), +        .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask), +        .dstOffset = MakeOffset3D(copy.dst_offset), +        .extent = MakeExtent3D(copy.extent), +    };  } -void CachedSurface::DecorateSurfaceName() { -    // TODO(Rodrigo): Add name decorations +[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( +    std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { +    std::vector<VkBufferCopy> result(copies.size()); +    std::ranges::transform( +        copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { +            return VkBufferCopy{ +                .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset), +                .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset), +                .size = static_cast<VkDeviceSize>(copy.size), +            }; +        }); +    return result;  } -View CachedSurface::CreateView(const ViewParams& view_params) { -    // TODO(Rodrigo): Add name decorations -    return views[view_params] = std::make_shared<CachedSurfaceView>(device, *this, view_params); +[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( +    std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { +    struct Maker { +        VkBufferImageCopy operator()(const BufferImageCopy& copy) const { +            return VkBufferImageCopy{ +                .bufferOffset = copy.buffer_offset + buffer_offset, +                .bufferRowLength = copy.buffer_row_length, +                .bufferImageHeight = copy.buffer_image_height, +                .imageSubresource = +                    { +                        .aspectMask = aspect_mask, +                        .mipLevel = static_cast<u32>(copy.image_subresource.base_level), +                        .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer), +                        .layerCount = static_cast<u32>(copy.image_subresource.num_layers), +                    }, +                .imageOffset = +                    { +                        .x = copy.image_offset.x, +                        .y = copy.image_offset.y, +                        .z = copy.image_offset.z, +                    }, +                .imageExtent = +                    { +                        .width = copy.image_extent.width, +                        .height = copy.image_extent.height, +                        .depth = copy.image_extent.depth, +                    }, +            }; +        } +        size_t buffer_offset; +        VkImageAspectFlags aspect_mask; +    }; +    if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { +        std::vector<VkBufferImageCopy> result(copies.size() * 2); +        std::ranges::transform(copies, result.begin(), +                               Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); +        std::ranges::transform(copies, result.begin() + copies.size(), +                               Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); +        return result; +    } else { +        std::vector<VkBufferImageCopy> result(copies.size()); +        std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); +        return result; +    }  } -void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { -    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); -    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask, +                                                           const SubresourceRange& range) { +    return VkImageSubresourceRange{ +        .aspectMask = aspect_mask, +        .baseMipLevel = static_cast<u32>(range.base.level), +        .levelCount = static_cast<u32>(range.extent.levels), +        .baseArrayLayer = static_cast<u32>(range.base.layer), +        .layerCount = static_cast<u32>(range.extent.layers), +    }; +} -    scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, -                      size = host_memory_size](vk::CommandBuffer cmdbuf) { -        VkBufferCopy copy; -        copy.srcOffset = 0; -        copy.dstOffset = 0; -        copy.size = size; -        cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) { +    SubresourceRange range = image_view->range; +    if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { +        // Slice image views always affect a single layer, but their subresource range corresponds +        // to the slice. Override the value to affect a single layer. +        range.base.layer = 0; +        range.extent.layers = 1; +    } +    return MakeSubresourceRange(ImageAspectMask(image_view->format), range); +} -        VkBufferMemoryBarrier barrier; -        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; -        barrier.pNext = nullptr; -        barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; -        barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; -        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway -        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; -        barrier.buffer = dst_buffer; -        barrier.offset = 0; -        barrier.size = size; -        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, -                               0, {}, barrier, {}); -    }); +[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) { +    return VkImageSubresourceLayers{ +        .aspectMask = ImageAspectMask(image_view->format), +        .mipLevel = static_cast<u32>(image_view->range.base.level), +        .baseArrayLayer = static_cast<u32>(image_view->range.base.layer), +        .layerCount = static_cast<u32>(image_view->range.extent.layers), +    };  } -void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { -    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); -    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); - -    FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, -                   VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - -    for (u32 level = 0; level < params.num_levels; ++level) { -        const VkBufferImageCopy copy = GetBufferImageCopy(level); -        if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { -            scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), -                              copy](vk::CommandBuffer cmdbuf) { -                std::array<VkBufferImageCopy, 2> copies = {copy, copy}; -                copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; -                copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; -                cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, -                                         copies); -            }); -        } else { -            scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), -                              copy](vk::CommandBuffer cmdbuf) { -                cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); -            }); -        } +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { +    switch (value) { +    case SwizzleSource::G: +        return SwizzleSource::R; +    default: +        return value;      }  } -VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { -    return { -        .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), -        .bufferRowLength = 0, -        .bufferImageHeight = 0, -        .imageSubresource = +void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, +                       VkImageAspectFlags aspect_mask, bool is_initialized, +                       std::span<const VkBufferImageCopy> copies) { +    static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | +                                                  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; +    const VkImageMemoryBarrier read_barrier{ +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +        .pNext = nullptr, +        .srcAccessMask = ACCESS_FLAGS, +        .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +        .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, +        .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +        .image = image, +        .subresourceRange =              { -                .aspectMask = image->GetAspectMask(), -                .mipLevel = level, +                .aspectMask = aspect_mask, +                .baseMipLevel = 0, +                .levelCount = VK_REMAINING_MIP_LEVELS,                  .baseArrayLayer = 0, -                .layerCount = static_cast<u32>(params.GetNumLayers()), +                .layerCount = VK_REMAINING_ARRAY_LAYERS,              }, -        .imageOffset = {.x = 0, .y = 0, .z = 0}, -        .imageExtent = +    }; +    const VkImageMemoryBarrier write_barrier{ +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +        .pNext = nullptr, +        .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +        .dstAccessMask = ACCESS_FLAGS, +        .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +        .newLayout = VK_IMAGE_LAYOUT_GENERAL, +        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +        .image = image, +        .subresourceRange =              { -                .width = params.GetMipWidth(level), -                .height = params.GetMipHeight(level), -                .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, +                .aspectMask = aspect_mask, +                .baseMipLevel = 0, +                .levelCount = VK_REMAINING_MIP_LEVELS, +                .baseArrayLayer = 0, +                .layerCount = VK_REMAINING_ARRAY_LAYERS,              },      }; +    cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, +                           read_barrier); +    cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies); +    // TODO: Move this to another API +    cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, +                           write_barrier);  } -VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { -    return {image->GetAspectMask(), 0, params.num_levels, 0, -            static_cast<u32>(params.GetNumLayers())}; +[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region, +                                        const std::array<Offset2D, 2>& src_region, +                                        const VkImageSubresourceLayers& dst_layers, +                                        const VkImageSubresourceLayers& src_layers) { +    return VkImageBlit{ +        .srcSubresource = src_layers, +        .srcOffsets = +            { +                { +                    .x = src_region[0].x, +                    .y = src_region[0].y, +                    .z = 0, +                }, +                { +                    .x = src_region[1].x, +                    .y = src_region[1].y, +                    .z = 1, +                }, +            }, +        .dstSubresource = dst_layers, +        .dstOffsets = +            { +                { +                    .x = dst_region[0].x, +                    .y = dst_region[0].y, +                    .z = 0, +                }, +                { +                    .x = dst_region[1].x, +                    .y = dst_region[1].y, +                    .z = 1, +                }, +            }, +    };  } -CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, -                                     const ViewParams& view_params_) -    : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, -      image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, -      aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, -      base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, -      image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { -    if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { -        base_layer = 0; -        num_layers = 1; -        base_slice = view_params_.base_layer; -        num_slices = view_params_.num_layers; -    } else { -        base_layer = view_params_.base_layer; -        num_layers = view_params_.num_layers; -    } +[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region, +                                              const std::array<Offset2D, 2>& src_region, +                                              const VkImageSubresourceLayers& dst_layers, +                                              const VkImageSubresourceLayers& src_layers) { +    return VkImageResolve{ +        .srcSubresource = src_layers, +        .srcOffset = +            { +                .x = src_region[0].x, +                .y = src_region[0].y, +                .z = 0, +            }, +        .dstSubresource = dst_layers, +        .dstOffset = +            { +                .x = dst_region[0].x, +                .y = dst_region[0].y, +                .z = 0, +            }, +        .extent = +            { +                .width = static_cast<u32>(dst_region[1].x - dst_region[0].x), +                .height = static_cast<u32>(dst_region[1].y - dst_region[0].y), +                .depth = 1, +            }, +    };  } -CachedSurfaceView::~CachedSurfaceView() = default; - -VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, -                                            SwizzleSource z_source, SwizzleSource w_source) { -    const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); -    if (last_image_view && last_swizzle == new_swizzle) { -        return last_image_view; +struct RangedBarrierRange { +    u32 min_mip = std::numeric_limits<u32>::max(); +    u32 max_mip = std::numeric_limits<u32>::min(); +    u32 min_layer = std::numeric_limits<u32>::max(); +    u32 max_layer = std::numeric_limits<u32>::min(); + +    void AddLayers(const VkImageSubresourceLayers& layers) { +        min_mip = std::min(min_mip, layers.mipLevel); +        max_mip = std::max(max_mip, layers.mipLevel + 1); +        min_layer = std::min(min_layer, layers.baseArrayLayer); +        max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);      } -    last_swizzle = new_swizzle; -    const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); -    auto& image_view = entry->second; -    if (!is_cache_miss) { -        return last_image_view = *image_view; +    VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { +        return VkImageSubresourceRange{ +            .aspectMask = aspect_mask, +            .baseMipLevel = min_mip, +            .levelCount = max_mip - min_mip, +            .baseArrayLayer = min_layer, +            .layerCount = max_layer - min_layer, +        };      } +}; -    std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), -                       MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; -    if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { -        // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. -        std::swap(swizzle[0], swizzle[2]); -    } +} // Anonymous namespace -    // Games can sample depth or stencil values on textures. This is decided by the swizzle value on -    // hardware. To emulate this on Vulkan we specify it in the aspect. -    VkImageAspectFlags aspect = aspect_mask; -    if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { -        UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); -        const bool is_first = x_source == SwizzleSource::R; -        switch (surface_params.pixel_format) { -        case PixelFormat::D24_UNORM_S8_UINT: -        case PixelFormat::D32_FLOAT_S8_UINT: -            aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; -            break; -        case PixelFormat::S8_UINT_D24_UNORM: -            aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; -            break; -        default: -            aspect = VK_IMAGE_ASPECT_DEPTH_BIT; -            UNIMPLEMENTED(); -        } +void TextureCacheRuntime::Finish() { +    scheduler.Finish(); +} -        // Make sure we sample the first component -        std::transform( -            swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { -                return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; -            }); -    } +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { +    const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true); +    return ImageBufferMap{ +        .handle = *buffer.handle, +        .map = buffer.commit->Map(size), +    }; +} -    if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { -        ASSERT(base_slice == 0); -        ASSERT(num_slices == surface_params.depth); +void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, +                                    const std::array<Offset2D, 2>& dst_region, +                                    const std::array<Offset2D, 2>& src_region, +                                    Tegra::Engines::Fermi2D::Filter filter, +                                    Tegra::Engines::Fermi2D::Operation operation) { +    const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); +    const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT; +    const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT; +    ASSERT(aspect_mask == ImageAspectMask(dst.format)); +    if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { +        blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, +                                    operation); +        return;      } - -    image_view = device.GetLogical().CreateImageView({ -        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, -        .pNext = nullptr, -        .flags = 0, -        .image = surface.GetImageHandle(), -        .viewType = image_view_type, -        .format = surface.GetImage().GetFormat(), -        .components = -            { -                .r = swizzle[0], -                .g = swizzle[1], -                .b = swizzle[2], -                .a = swizzle[3], +    if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { +        if (!device.IsBlitDepthStencilSupported()) { +            UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); +            blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), +                                               dst_region, src_region, filter, operation); +            return; +        } +    } +    ASSERT(src.ImageFormat() == dst.ImageFormat()); +    ASSERT(!(is_dst_msaa && !is_src_msaa)); +    ASSERT(operation == Fermi2D::Operation::SrcCopy); + +    const VkImage dst_image = dst.ImageHandle(); +    const VkImage src_image = src.ImageHandle(); +    const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst); +    const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src); +    const bool is_resolve = is_src_msaa && !is_dst_msaa; +    scheduler.RequestOutsideRenderPassOperationContext(); +    scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers, +                      aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) { +        const std::array read_barriers{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = src_image, +                .subresourceRange{ +                    .aspectMask = aspect_mask, +                    .baseMipLevel = 0, +                    .levelCount = VK_REMAINING_MIP_LEVELS, +                    .baseArrayLayer = 0, +                    .layerCount = VK_REMAINING_ARRAY_LAYERS, +                },              }, -        .subresourceRange = -            { -                .aspectMask = aspect, -                .baseMipLevel = base_level, -                .levelCount = num_levels, -                .baseArrayLayer = base_layer, -                .layerCount = num_layers, +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = dst_image, +                .subresourceRange{ +                    .aspectMask = aspect_mask, +                    .baseMipLevel = 0, +                    .levelCount = VK_REMAINING_MIP_LEVELS, +                    .baseArrayLayer = 0, +                    .layerCount = VK_REMAINING_ARRAY_LAYERS, +                }, +            }, +        }; +        VkImageMemoryBarrier write_barrier{ +            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +            .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | +                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | +                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                             VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, +            .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +            .newLayout = VK_IMAGE_LAYOUT_GENERAL, +            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .image = dst_image, +            .subresourceRange{ +                .aspectMask = aspect_mask, +                .baseMipLevel = 0, +                .levelCount = VK_REMAINING_MIP_LEVELS, +                .baseArrayLayer = 0, +                .layerCount = VK_REMAINING_ARRAY_LAYERS,              }, +        }; +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                               0, nullptr, nullptr, read_barriers); +        if (is_resolve) { +            cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, +                                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                                MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); +        } else { +            const bool is_linear = filter == Fermi2D::Filter::Bilinear; +            const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; +            cmdbuf.BlitImage( +                src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter); +        } +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +                               0, write_barrier);      }); - -    return last_image_view = *image_view;  } -VkImageView CachedSurfaceView::GetAttachment() { -    if (render_target) { -        return *render_target; +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { +    switch (dst_view.format) { +    case PixelFormat::R16_UNORM: +        if (src_view.format == PixelFormat::D16_UNORM) { +            return blit_image_helper.ConvertD16ToR16(dst, src_view); +        } +        break; +    case PixelFormat::R32_FLOAT: +        if (src_view.format == PixelFormat::D32_FLOAT) { +            return blit_image_helper.ConvertD32ToR32(dst, src_view); +        } +        break; +    case PixelFormat::D16_UNORM: +        if (src_view.format == PixelFormat::R16_UNORM) { +            return blit_image_helper.ConvertR16ToD16(dst, src_view); +        } +        break; +    case PixelFormat::D32_FLOAT: +        if (src_view.format == PixelFormat::R32_FLOAT) { +            return blit_image_helper.ConvertR32ToD32(dst, src_view); +        } +        break; +    default: +        break;      } +    UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); +} -    VkImageViewCreateInfo ci{ -        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, -        .pNext = nullptr, -        .flags = 0, -        .image = surface.GetImageHandle(), -        .viewType = VK_IMAGE_VIEW_TYPE_1D, -        .format = surface.GetImage().GetFormat(), -        .components = -            { -                .r = VK_COMPONENT_SWIZZLE_IDENTITY, -                .g = VK_COMPONENT_SWIZZLE_IDENTITY, -                .b = VK_COMPONENT_SWIZZLE_IDENTITY, -                .a = VK_COMPONENT_SWIZZLE_IDENTITY, +void TextureCacheRuntime::CopyImage(Image& dst, Image& src, +                                    std::span<const VideoCommon::ImageCopy> copies) { +    std::vector<VkImageCopy> vk_copies(copies.size()); +    const VkImageAspectFlags aspect_mask = dst.AspectMask(); +    ASSERT(aspect_mask == src.AspectMask()); + +    std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { +        return MakeImageCopy(copy, aspect_mask); +    }); +    const VkImage dst_image = dst.Handle(); +    const VkImage src_image = src.Handle(); +    scheduler.RequestOutsideRenderPassOperationContext(); +    scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { +        RangedBarrierRange dst_range; +        RangedBarrierRange src_range; +        for (const VkImageCopy& copy : vk_copies) { +            dst_range.AddLayers(copy.dstSubresource); +            src_range.AddLayers(copy.srcSubresource); +        } +        const std::array read_barriers{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = src_image, +                .subresourceRange = src_range.SubresourceRange(aspect_mask),              }, -        .subresourceRange = -            { -                .aspectMask = aspect_mask, -                .baseMipLevel = base_level, -                .levelCount = num_levels, -                .baseArrayLayer = 0, -                .layerCount = 0, +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = dst_image, +                .subresourceRange = dst_range.SubresourceRange(aspect_mask),              }, -    }; -    if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { -        ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; -        ci.subresourceRange.baseArrayLayer = base_slice; -        ci.subresourceRange.layerCount = num_slices; +        }; +        const VkImageMemoryBarrier write_barrier{ +            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +            .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | +                             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | +                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | +                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                             VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, +            .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +            .newLayout = VK_IMAGE_LAYOUT_GENERAL, +            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +            .image = dst_image, +            .subresourceRange = dst_range.SubresourceRange(aspect_mask), +        }; +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                               0, {}, {}, read_barriers); +        cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, +                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies); +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +                               0, write_barrier); +    }); +} + +Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, +             VAddr cpu_addr_) +    : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, +      image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), +      aspect_mask(ImageAspectMask(info.format)) { +    if (image) { +        commit = runtime.memory_manager.Commit(image, false);      } else { -        ci.viewType = image_view_type; -        ci.subresourceRange.baseArrayLayer = base_layer; -        ci.subresourceRange.layerCount = num_layers; +        commit = runtime.memory_manager.Commit(buffer, false); +    } +    if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { +        flags |= VideoCommon::ImageFlagBits::Converted; +    } +    if (runtime.device.HasDebuggingToolAttached()) { +        if (image) { +            image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); +        } else { +            buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); +        }      } -    render_target = device.GetLogical().CreateImageView(ci); -    return *render_target;  } -VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, -                               Tegra::Engines::Maxwell3D& maxwell3d_, -                               Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, -                               VKMemoryManager& memory_manager_, VKScheduler& scheduler_, -                               VKStagingBufferPool& staging_pool_) -    : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), -      device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ -                                                                                   staging_pool_} {} - -VKTextureCache::~VKTextureCache() = default; - -Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { -    return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, -                                           gpu_addr, params); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                         std::span<const BufferImageCopy> copies) { +    // TODO: Move this to another API +    scheduler->RequestOutsideRenderPassOperationContext(); +    std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); +    const VkBuffer src_buffer = map.handle; +    const VkImage vk_image = *image; +    const VkImageAspectFlags vk_aspect_mask = aspect_mask; +    const bool is_initialized = std::exchange(initialized, true); +    scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, +                       vk_copies](vk::CommandBuffer cmdbuf) { +        CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); +    });  } -void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, -                               const VideoCommon::CopyParams& copy_params) { -    const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; -    const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; -    UNIMPLEMENTED_IF(src_3d); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                         std::span<const VideoCommon::BufferCopy> copies) { +    // TODO: Move this to another API +    scheduler->RequestOutsideRenderPassOperationContext(); +    std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); +    const VkBuffer src_buffer = map.handle; +    const VkBuffer dst_buffer = *buffer; +    scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { +        // TODO: Barriers +        cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); +    }); +} -    // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and -    // dimension respectively. -    const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; -    const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; +void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, +                           std::span<const BufferImageCopy> copies) { +    std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); +    scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, +                       vk_copies](vk::CommandBuffer cmdbuf) { +        // TODO: Barriers +        cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); +    }); +} -    const u32 extent_z = dst_3d ? copy_params.depth : 1; -    const u32 num_layers = dst_3d ? 1 : copy_params.depth; +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, +                     ImageId image_id_, Image& image) +    : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, +      image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( +                                                                         image.info.num_samples)} { +    const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); +    std::array<SwizzleSource, 4> swizzle{ +        SwizzleSource::R, +        SwizzleSource::G, +        SwizzleSource::B, +        SwizzleSource::A, +    }; +    if (!info.IsRenderTarget()) { +        swizzle = info.Swizzle(); +        if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { +            std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); +        } +    } +    const VkFormat vk_format = +        MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format; +    const VkImageViewCreateInfo create_info{ +        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .image = image.Handle(), +        .viewType = VkImageViewType{}, +        .format = vk_format, +        .components{ +            .r = ComponentSwizzle(swizzle[0]), +            .g = ComponentSwizzle(swizzle[1]), +            .b = ComponentSwizzle(swizzle[2]), +            .a = ComponentSwizzle(swizzle[3]), +        }, +        .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), +    }; +    const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { +        VkImageViewCreateInfo ci{create_info}; +        ci.viewType = ImageViewType(view_type); +        if (num_layers) { +            ci.subresourceRange.layerCount = *num_layers; +        } +        vk::ImageView handle = device->GetLogical().CreateImageView(ci); +        if (device->HasDebuggingToolAttached()) { +            handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); +        } +        image_views[static_cast<size_t>(view_type)] = std::move(handle); +    }; +    switch (info.type) { +    case VideoCommon::ImageViewType::e1D: +    case VideoCommon::ImageViewType::e1DArray: +        create(VideoCommon::ImageViewType::e1D, 1); +        create(VideoCommon::ImageViewType::e1DArray, std::nullopt); +        render_target = Handle(VideoCommon::ImageViewType::e1DArray); +        break; +    case VideoCommon::ImageViewType::e2D: +    case VideoCommon::ImageViewType::e2DArray: +        create(VideoCommon::ImageViewType::e2D, 1); +        create(VideoCommon::ImageViewType::e2DArray, std::nullopt); +        render_target = Handle(VideoCommon::ImageViewType::e2DArray); +        break; +    case VideoCommon::ImageViewType::e3D: +        create(VideoCommon::ImageViewType::e3D, std::nullopt); +        render_target = Handle(VideoCommon::ImageViewType::e3D); +        break; +    case VideoCommon::ImageViewType::Cube: +    case VideoCommon::ImageViewType::CubeArray: +        create(VideoCommon::ImageViewType::Cube, 6); +        create(VideoCommon::ImageViewType::CubeArray, std::nullopt); +        break; +    case VideoCommon::ImageViewType::Rect: +        UNIMPLEMENTED(); +        break; +    case VideoCommon::ImageViewType::Buffer: +        buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ +            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, +            .pNext = nullptr, +            .flags = 0, +            .buffer = image.Buffer(), +            .format = vk_format, +            .offset = 0, // TODO: Redesign buffer cache to support this +            .range = image.guest_size_bytes, +        }); +        break; +    } +} -    // We can't copy inside a renderpass -    scheduler.RequestOutsideRenderPassOperationContext(); +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) +    : VideoCommon::ImageViewBase{params} {} -    src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, -                            VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, -                            VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); -    dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, -                            VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, -                            VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); +VkImageView ImageView::DepthView() { +    if (depth_view) { +        return *depth_view; +    } +    depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); +    return *depth_view; +} -    const VkImageCopy copy{ -        .srcSubresource = -            { -                .aspectMask = src_surface->GetAspectMask(), -                .mipLevel = copy_params.source_level, -                .baseArrayLayer = copy_params.source_z, -                .layerCount = num_layers, -            }, -        .srcOffset = -            { -                .x = static_cast<s32>(copy_params.source_x), -                .y = static_cast<s32>(copy_params.source_y), -                .z = 0, -            }, -        .dstSubresource = -            { -                .aspectMask = dst_surface->GetAspectMask(), -                .mipLevel = copy_params.dest_level, -                .baseArrayLayer = dst_base_layer, -                .layerCount = num_layers, -            }, -        .dstOffset = -            { -                .x = static_cast<s32>(copy_params.dest_x), -                .y = static_cast<s32>(copy_params.dest_y), -                .z = static_cast<s32>(dst_offset_z), -            }, -        .extent = -            { -                .width = copy_params.width, -                .height = copy_params.height, -                .depth = extent_z, -            }, -    }; +VkImageView ImageView::StencilView() { +    if (stencil_view) { +        return *stencil_view; +    } +    stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); +    return *stencil_view; +} -    const VkImage src_image = src_surface->GetImageHandle(); -    const VkImage dst_image = dst_surface->GetImageHandle(); -    scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { -        cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, -                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); +vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { +    return device->GetLogical().CreateImageView({ +        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .image = image_handle, +        .viewType = ImageViewType(type), +        .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format, +        .components{ +            .r = VK_COMPONENT_SWIZZLE_IDENTITY, +            .g = VK_COMPONENT_SWIZZLE_IDENTITY, +            .b = VK_COMPONENT_SWIZZLE_IDENTITY, +            .a = VK_COMPONENT_SWIZZLE_IDENTITY, +        }, +        .subresourceRange = MakeSubresourceRange(aspect_mask, range),      });  } -void VKTextureCache::ImageBlit(View& src_view, View& dst_view, -                               const Tegra::Engines::Fermi2D::Config& copy_config) { -    // We can't blit inside a renderpass -    scheduler.RequestOutsideRenderPassOperationContext(); - -    src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, -                         VK_ACCESS_TRANSFER_READ_BIT); -    dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, -                         VK_ACCESS_TRANSFER_WRITE_BIT); - -    VkImageBlit blit; -    blit.srcSubresource = src_view->GetImageSubresourceLayers(); -    blit.srcOffsets[0].x = copy_config.src_rect.left; -    blit.srcOffsets[0].y = copy_config.src_rect.top; -    blit.srcOffsets[0].z = 0; -    blit.srcOffsets[1].x = copy_config.src_rect.right; -    blit.srcOffsets[1].y = copy_config.src_rect.bottom; -    blit.srcOffsets[1].z = 1; -    blit.dstSubresource = dst_view->GetImageSubresourceLayers(); -    blit.dstOffsets[0].x = copy_config.dst_rect.left; -    blit.dstOffsets[0].y = copy_config.dst_rect.top; -    blit.dstOffsets[0].z = 0; -    blit.dstOffsets[1].x = copy_config.dst_rect.right; -    blit.dstOffsets[1].y = copy_config.dst_rect.bottom; -    blit.dstOffsets[1].z = 1; - -    const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - -    scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, -                      is_linear](vk::CommandBuffer cmdbuf) { -        cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, -                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, -                         is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); +Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) { +    const auto& device = runtime.device; +    const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported(); +    const std::array<float, 4> color = tsc.BorderColor(); +    // C++20 bit_cast +    VkClearColorValue border_color; +    std::memcpy(&border_color, &color, sizeof(color)); +    const VkSamplerCustomBorderColorCreateInfoEXT border_ci{ +        .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, +        .pNext = nullptr, +        .customBorderColor = border_color, +        .format = VK_FORMAT_UNDEFINED, +    }; +    const void* pnext = nullptr; +    if (arbitrary_borders) { +        pnext = &border_ci; +    } +    const VkSamplerReductionModeCreateInfoEXT reduction_ci{ +        .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT, +        .pNext = pnext, +        .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter), +    }; +    if (runtime.device.IsExtSamplerFilterMinmaxSupported()) { +        pnext = &reduction_ci; +    } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) { +        LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); +    } +    // Some games have samplers with garbage. Sanitize them here. +    const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); +    sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ +        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, +        .pNext = pnext, +        .flags = 0, +        .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), +        .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), +        .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), +        .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), +        .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), +        .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), +        .mipLodBias = tsc.LodBias(), +        .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), +        .maxAnisotropy = max_anisotropy, +        .compareEnable = tsc.depth_compare_enabled, +        .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), +        .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), +        .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), +        .borderColor = +            arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), +        .unnormalizedCoordinates = VK_FALSE,      });  } -void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { -    // Currently unimplemented. PBO copies should be dropped and we should use a render pass to -    // convert from color to depth and viceversa. -    LOG_WARNING(Render_Vulkan, "Unimplemented"); +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, +                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { +    std::vector<VkAttachmentDescription> descriptions; +    std::vector<VkImageView> attachments; +    RenderPassKey renderpass_key{}; +    s32 num_layers = 1; + +    for (size_t index = 0; index < NUM_RT; ++index) { +        const ImageView* const color_buffer = color_buffers[index]; +        if (!color_buffer) { +            renderpass_key.color_formats[index] = PixelFormat::Invalid; +            continue; +        } +        descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); +        attachments.push_back(color_buffer->RenderTarget()); +        renderpass_key.color_formats[index] = color_buffer->format; +        num_layers = std::max(num_layers, color_buffer->range.extent.layers); +        images[num_images] = color_buffer->ImageHandle(); +        image_ranges[num_images] = MakeSubresourceRange(color_buffer); +        samples = color_buffer->Samples(); +        ++num_images; +    } +    const size_t num_colors = attachments.size(); +    const VkAttachmentReference* depth_attachment = +        depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; +    if (depth_buffer) { +        descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); +        attachments.push_back(depth_buffer->RenderTarget()); +        renderpass_key.depth_format = depth_buffer->format; +        num_layers = std::max(num_layers, depth_buffer->range.extent.layers); +        images[num_images] = depth_buffer->ImageHandle(); +        image_ranges[num_images] = MakeSubresourceRange(depth_buffer); +        samples = depth_buffer->Samples(); +        ++num_images; +    } else { +        renderpass_key.depth_format = PixelFormat::Invalid; +    } +    renderpass_key.samples = samples; + +    const auto& device = runtime.device.GetLogical(); +    const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); +    if (is_new) { +        const VkSubpassDescription subpass{ +            .flags = 0, +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, +            .inputAttachmentCount = 0, +            .pInputAttachments = nullptr, +            .colorAttachmentCount = static_cast<u32>(num_colors), +            .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, +            .pResolveAttachments = nullptr, +            .pDepthStencilAttachment = depth_attachment, +            .preserveAttachmentCount = 0, +            .pPreserveAttachments = nullptr, +        }; +        cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ +            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, +            .pNext = nullptr, +            .flags = 0, +            .attachmentCount = static_cast<u32>(descriptions.size()), +            .pAttachments = descriptions.data(), +            .subpassCount = 1, +            .pSubpasses = &subpass, +            .dependencyCount = 0, +            .pDependencies = nullptr, +        }); +    } +    renderpass = *cache_pair->second; +    render_area = VkExtent2D{ +        .width = key.size.width, +        .height = key.size.height, +    }; +    num_color_buffers = static_cast<u32>(num_colors); +    framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ +        .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .renderPass = renderpass, +        .attachmentCount = static_cast<u32>(attachments.size()), +        .pAttachments = attachments.data(), +        .width = key.size.width, +        .height = key.size.height, +        .layers = static_cast<u32>(num_layers), +    }); +    if (runtime.device.HasDebuggingToolAttached()) { +        framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); +    }  }  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b0be4cb0f..edc3d80c0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -4,217 +4,265 @@  #pragma once -#include <memory> -#include <unordered_map> +#include <compare> +#include <span> -#include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_image.h"  #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_scheduler.h"  #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/texture_cache/surface_base.h"  #include "video_core/texture_cache/texture_cache.h" -namespace VideoCore { -class RasterizerInterface; -} -  namespace Vulkan { -class RasterizerVulkan; +using VideoCommon::ImageId; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +using VideoCore::Surface::PixelFormat; +  class VKDevice;  class VKScheduler;  class VKStagingBufferPool; -class CachedSurfaceView; -class CachedSurface; +class BlitImageHelper; +class Image; +class ImageView; +class Framebuffer; -using Surface = std::shared_ptr<CachedSurface>; -using View = std::shared_ptr<CachedSurfaceView>; -using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; +struct RenderPassKey { +    constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; +    std::array<PixelFormat, NUM_RT> color_formats; +    PixelFormat depth_format; +    VkSampleCountFlagBits samples; +}; -class CachedSurface final : public VideoCommon::SurfaceBase<View> { -    friend CachedSurfaceView; +} // namespace Vulkan -public: -    explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, -                           VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, -                           GPUVAddr gpu_addr_, const SurfaceParams& params_); -    ~CachedSurface(); +namespace std { +template <> +struct hash<Vulkan::RenderPassKey> { +    [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { +        size_t value = static_cast<size_t>(key.depth_format) << 48; +        value ^= static_cast<size_t>(key.samples) << 52; +        for (size_t i = 0; i < key.color_formats.size(); ++i) { +            value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); +        } +        return value; +    } +}; +} // namespace std -    void UploadTexture(const std::vector<u8>& staging_buffer) override; -    void DownloadTexture(std::vector<u8>& staging_buffer) override; +namespace Vulkan { -    void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, -                        VkImageLayout new_layout) { -        image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, -                          new_stage_mask, new_access, new_layout); +struct ImageBufferMap { +    [[nodiscard]] VkBuffer Handle() const noexcept { +        return handle;      } -    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, -                    VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, -                    VkImageLayout new_layout) { -        image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, -                          new_access, new_layout); +    [[nodiscard]] std::span<u8> Span() const noexcept { +        return map.Span();      } -    VKImage& GetImage() { -        return *image; -    } +    VkBuffer handle; +    MemoryMap map; +}; -    const VKImage& GetImage() const { -        return *image; -    } +struct TextureCacheRuntime { +    const VKDevice& device; +    VKScheduler& scheduler; +    VKMemoryManager& memory_manager; +    VKStagingBufferPool& staging_buffer_pool; +    BlitImageHelper& blit_image_helper; +    std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache; + +    void Finish(); -    VkImage GetImageHandle() const { -        return *image->GetHandle(); +    [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); + +    [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) { +        // TODO: Have a special function for this +        return MapUploadBuffer(size);      } -    VkImageAspectFlags GetAspectMask() const { -        return image->GetAspectMask(); +    void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, +                   const std::array<Offset2D, 2>& dst_region, +                   const std::array<Offset2D, 2>& src_region, +                   Tegra::Engines::Fermi2D::Filter filter, +                   Tegra::Engines::Fermi2D::Operation operation); + +    void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + +    void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); + +    [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { +        return false;      } -    VkBufferView GetBufferViewHandle() const { -        return *buffer_view; +    void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, +                               std::span<const VideoCommon::SwizzleParameters>) { +        UNREACHABLE();      } -protected: -    void DecorateSurfaceName() override; +    void InsertUploadMemoryBarrier() {} +}; -    View CreateView(const ViewParams& view_params) override; +class Image : public VideoCommon::ImageBase { +public: +    explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, +                   VAddr cpu_addr); -private: -    void UploadBuffer(const std::vector<u8>& staging_buffer); +    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                      std::span<const VideoCommon::BufferImageCopy> copies); -    void UploadImage(const std::vector<u8>& staging_buffer); +    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +                      std::span<const VideoCommon::BufferCopy> copies); -    VkBufferImageCopy GetBufferImageCopy(u32 level) const; +    void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, +                        std::span<const VideoCommon::BufferImageCopy> copies); -    VkImageSubresourceRange GetImageSubresourceRange() const; +    [[nodiscard]] VkImage Handle() const noexcept { +        return *image; +    } -    const VKDevice& device; -    VKMemoryManager& memory_manager; -    VKScheduler& scheduler; -    VKStagingBufferPool& staging_pool; +    [[nodiscard]] VkBuffer Buffer() const noexcept { +        return *buffer; +    } + +    [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { +        return aspect_mask; +    } -    std::optional<VKImage> image; +private: +    VKScheduler* scheduler; +    vk::Image image;      vk::Buffer buffer; -    vk::BufferView buffer_view;      VKMemoryCommit commit; - -    VkFormat format = VK_FORMAT_UNDEFINED; +    VkImageAspectFlags aspect_mask = 0; +    bool initialized = false;  }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class ImageView : public VideoCommon::ImageViewBase {  public: -    explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, -                               const ViewParams& view_params_); -    ~CachedSurfaceView(); +    explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); +    explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); -    VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, -                             Tegra::Texture::SwizzleSource y_source, -                             Tegra::Texture::SwizzleSource z_source, -                             Tegra::Texture::SwizzleSource w_source); +    [[nodiscard]] VkImageView DepthView(); -    VkImageView GetAttachment(); +    [[nodiscard]] VkImageView StencilView(); -    bool IsSameSurface(const CachedSurfaceView& rhs) const { -        return &surface == &rhs.surface; +    [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { +        return *image_views[static_cast<size_t>(query_type)];      } -    u32 GetWidth() const { -        return surface_params.GetMipWidth(base_level); +    [[nodiscard]] VkBufferView BufferView() const noexcept { +        return *buffer_view;      } -    u32 GetHeight() const { -        return surface_params.GetMipHeight(base_level); +    [[nodiscard]] VkImage ImageHandle() const noexcept { +        return image_handle;      } -    u32 GetNumLayers() const { -        return num_layers; +    [[nodiscard]] VkImageView RenderTarget() const noexcept { +        return render_target;      } -    bool IsBufferView() const { -        return buffer_view; +    [[nodiscard]] PixelFormat ImageFormat() const noexcept { +        return image_format;      } -    VkImage GetImage() const { -        return image; +    [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { +        return samples;      } -    VkBufferView GetBufferView() const { -        return buffer_view; -    } +private: +    [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); -    VkImageSubresourceRange GetImageSubresourceRange() const { -        return {aspect_mask, base_level, num_levels, base_layer, num_layers}; -    } +    const VKDevice* device = nullptr; +    std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; +    vk::ImageView depth_view; +    vk::ImageView stencil_view; +    vk::BufferView buffer_view; +    VkImage image_handle = VK_NULL_HANDLE; +    VkImageView render_target = VK_NULL_HANDLE; +    PixelFormat image_format = PixelFormat::Invalid; +    VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; +}; -    VkImageSubresourceLayers GetImageSubresourceLayers() const { -        return {surface.GetAspectMask(), base_level, base_layer, num_layers}; -    } +class ImageAlloc : public VideoCommon::ImageAllocBase {}; -    void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, -                    VkAccessFlags new_access) const { -        surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, -                           new_access, new_layout); -    } +class Sampler { +public: +    explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); -    void MarkAsModified(u64 tick) { -        surface.MarkAsModified(true, tick); +    [[nodiscard]] VkSampler Handle() const noexcept { +        return *sampler;      }  private: -    // Store a copy of these values to avoid double dereference when reading them -    const SurfaceParams surface_params; -    const VkImage image; -    const VkBufferView buffer_view; -    const VkImageAspectFlags aspect_mask; - -    const VKDevice& device; -    CachedSurface& surface; -    const u32 base_level; -    const u32 num_levels; -    const VkImageViewType image_view_type; -    u32 base_layer = 0; -    u32 num_layers = 0; -    u32 base_slice = 0; -    u32 num_slices = 0; - -    VkImageView last_image_view = nullptr; -    u32 last_swizzle = 0; - -    vk::ImageView render_target; -    std::unordered_map<u32, vk::ImageView> view_cache; +    vk::Sampler sampler;  }; -class VKTextureCache final : public TextureCacheBase { +class Framebuffer {  public: -    explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, -                            Tegra::Engines::Maxwell3D& maxwell3d_, -                            Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, -                            VKMemoryManager& memory_manager_, VKScheduler& scheduler_, -                            VKStagingBufferPool& staging_pool_); -    ~VKTextureCache(); +    explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, +                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key); -private: -    Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; +    [[nodiscard]] VkFramebuffer Handle() const noexcept { +        return *framebuffer; +    } -    void ImageCopy(Surface& src_surface, Surface& dst_surface, -                   const VideoCommon::CopyParams& copy_params) override; +    [[nodiscard]] VkRenderPass RenderPass() const noexcept { +        return renderpass; +    } -    void ImageBlit(View& src_view, View& dst_view, -                   const Tegra::Engines::Fermi2D::Config& copy_config) override; +    [[nodiscard]] VkExtent2D RenderArea() const noexcept { +        return render_area; +    } -    void BufferCopy(Surface& src_surface, Surface& dst_surface) override; +    [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { +        return samples; +    } -    const VKDevice& device; -    VKMemoryManager& memory_manager; -    VKScheduler& scheduler; -    VKStagingBufferPool& staging_pool; +    [[nodiscard]] u32 NumColorBuffers() const noexcept { +        return num_color_buffers; +    } + +    [[nodiscard]] u32 NumImages() const noexcept { +        return num_images; +    } + +    [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept { +        return images; +    } + +    [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept { +        return image_ranges; +    } + +private: +    vk::Framebuffer framebuffer; +    VkRenderPass renderpass{}; +    VkExtent2D render_area{}; +    VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; +    u32 num_color_buffers = 0; +    u32 num_images = 0; +    std::array<VkImage, 9> images{}; +    std::array<VkImageSubresourceRange, 9> image_ranges{}; +}; + +struct TextureCacheParams { +    static constexpr bool ENABLE_VALIDATION = true; +    static constexpr bool FRAMEBUFFER_BLITS = false; +    static constexpr bool HAS_EMULATED_COPIES = false; + +    using Runtime = Vulkan::TextureCacheRuntime; +    using Image = Vulkan::Image; +    using ImageAlloc = Vulkan::ImageAlloc; +    using ImageView = Vulkan::ImageView; +    using Sampler = Vulkan::Sampler; +    using Framebuffer = Vulkan::Framebuffer;  }; +using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; +  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index f7e3c9821..f098a8540 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -40,30 +40,34 @@ public:      void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); -    void AddSampledImage(VkSampler sampler, VkImageView image_view) { -        payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); +    void AddSampledImage(VkImageView image_view, VkSampler sampler) { +        payload.emplace_back(VkDescriptorImageInfo{ +            .sampler = sampler, +            .imageView = image_view, +            .imageLayout = VK_IMAGE_LAYOUT_GENERAL, +        });      }      void AddImage(VkImageView image_view) { -        payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); +        payload.emplace_back(VkDescriptorImageInfo{ +            .sampler = VK_NULL_HANDLE, +            .imageView = image_view, +            .imageLayout = VK_IMAGE_LAYOUT_GENERAL, +        });      } -    void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { -        payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); +    void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { +        payload.emplace_back(VkDescriptorBufferInfo{ +            .buffer = buffer, +            .offset = offset, +            .range = size, +        });      }      void AddTexelBuffer(VkBufferView texel_buffer) {          payload.emplace_back(texel_buffer);      } -    VkImageLayout* LastImageLayout() { -        return &payload.back().image.imageLayout; -    } - -    const VkImageLayout* LastImageLayout() const { -        return &payload.back().image.imageLayout; -    } -  private:      const VKDevice& device;      VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 1eced809e..2a21e850d 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {      X(vkCmdBeginQuery);      X(vkCmdBeginRenderPass);      X(vkCmdBeginTransformFeedbackEXT); +    X(vkCmdBeginDebugUtilsLabelEXT);      X(vkCmdBindDescriptorSets);      X(vkCmdBindIndexBuffer);      X(vkCmdBindPipeline); @@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {      X(vkCmdEndQuery);      X(vkCmdEndRenderPass);      X(vkCmdEndTransformFeedbackEXT); +    X(vkCmdEndDebugUtilsLabelEXT);      X(vkCmdFillBuffer);      X(vkCmdPipelineBarrier);      X(vkCmdPushConstants); @@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {      X(vkCmdSetPrimitiveTopologyEXT);      X(vkCmdSetStencilOpEXT);      X(vkCmdSetStencilTestEnableEXT); +    X(vkCmdResolveImage);      X(vkCreateBuffer);      X(vkCreateBufferView);      X(vkCreateCommandPool); @@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {      X(vkQueueSubmit);      X(vkResetFences);      X(vkResetQueryPoolEXT); +    X(vkSetDebugUtilsObjectNameEXT); +    X(vkSetDebugUtilsObjectTagEXT);      X(vkUnmapMemory);      X(vkUpdateDescriptorSetWithTemplateKHR);      X(vkUpdateDescriptorSets); @@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {  #undef X  } +template <typename T> +void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, +                   const char* name) { +    const VkDebugUtilsObjectNameInfoEXT name_info{ +        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, +        .pNext = nullptr, +        .objectType = VK_OBJECT_TYPE_IMAGE, +        .objectHandle = reinterpret_cast<u64>(handle), +        .pObjectName = name, +    }; +    Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); +} +  } // Anonymous namespace  bool Load(InstanceDispatch& dld) noexcept { @@ -476,8 +494,7 @@ DebugCallback Instance::TryCreateDebugCallback(                             VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |                             VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,          .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | -                       VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | -                       VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, +                       VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,          .pfnUserCallback = callback,          .pUserData = nullptr,      }; @@ -493,10 +510,38 @@ void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {      Check(dld->vkBindBufferMemory(owner, handle, memory, offset));  } +void Buffer::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); +} + +void BufferView::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); +} +  void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {      Check(dld->vkBindImageMemory(owner, handle, memory, offset));  } +void Image::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); +} + +void ImageView::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); +} + +void DeviceMemory::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); +} + +void Fence::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); +} + +void Framebuffer::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); +} +  DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {      const std::size_t num = ai.descriptorSetCount;      std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); @@ -510,6 +555,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c      }  } +void DescriptorPool::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); +} +  CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {      const VkCommandBufferAllocateInfo ai{          .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, @@ -530,6 +579,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev      }  } +void CommandPool::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); +} +  std::vector<VkImage> SwapchainKHR::GetImages() const {      u32 num;      Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); @@ -538,6 +591,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {      return images;  } +void Event::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); +} + +void ShaderModule::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); +} + +void Semaphore::SetObjectNameEXT(const char* name) const { +    SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); +} +  Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,                        Span<const char*> enabled_extensions, const void* next,                        DeviceDispatch& dispatch) noexcept { diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 76f790eab..f9a184e00 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -9,6 +9,7 @@  #include <limits>  #include <memory>  #include <optional> +#include <span>  #include <type_traits>  #include <utility>  #include <vector> @@ -18,6 +19,10 @@  #include "common/common_types.h" +#ifdef _MSC_VER +#pragma warning(disable : 26812) // Disable prefer enum class over enum +#endif +  namespace Vulkan::vk {  /** @@ -41,6 +46,9 @@ public:      /// Construct an empty span.      constexpr Span() noexcept = default; +    /// Construct an empty span +    constexpr Span(std::nullptr_t) noexcept {} +      /// Construct a span from a single element.      constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} @@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch {      PFN_vkCmdBeginQuery vkCmdBeginQuery;      PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass;      PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; +    PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT;      PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;      PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;      PFN_vkCmdBindPipeline vkCmdBindPipeline; @@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch {      PFN_vkCmdEndQuery vkCmdEndQuery;      PFN_vkCmdEndRenderPass vkCmdEndRenderPass;      PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; +    PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT;      PFN_vkCmdFillBuffer vkCmdFillBuffer;      PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;      PFN_vkCmdPushConstants vkCmdPushConstants; @@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch {      PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;      PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;      PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; +    PFN_vkCmdResolveImage vkCmdResolveImage;      PFN_vkCreateBuffer vkCreateBuffer;      PFN_vkCreateBufferView vkCreateBufferView;      PFN_vkCreateCommandPool vkCreateCommandPool; @@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch {      PFN_vkQueueSubmit vkQueueSubmit;      PFN_vkResetFences vkResetFences;      PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; +    PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; +    PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT;      PFN_vkUnmapMemory vkUnmapMemory;      PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;      PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; @@ -542,18 +555,14 @@ private:      const DeviceDispatch* dld = nullptr;  }; -using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>;  using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;  using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;  using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; -using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>; -using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>;  using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;  using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;  using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;  using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;  using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; -using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;  using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;  using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; @@ -605,6 +614,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {  public:      /// Attaches a memory allocation.      void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +}; + +class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { +    using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; + +public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const;  };  class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { @@ -613,12 +633,26 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {  public:      /// Attaches a memory allocation.      void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +}; + +class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { +    using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; + +public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const;  };  class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {      using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;  public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +      u8* Map(VkDeviceSize offset, VkDeviceSize size) const {          void* data;          Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); @@ -634,6 +668,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> {      using Handle<VkFence, VkDevice, DeviceDispatch>::Handle;  public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +      VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept {          return dld->vkWaitForFences(owner, 1, &handle, true, timeout);      } @@ -647,11 +684,22 @@ public:      }  }; +class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> { +    using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle; + +public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +}; +  class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> {      using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle;  public:      DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; + +    /// Set object name. +    void SetObjectNameEXT(const char* name) const;  };  class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { @@ -660,6 +708,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {  public:      CommandBuffers Allocate(std::size_t num_buffers,                              VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; + +    /// Set object name. +    void SetObjectNameEXT(const char* name) const;  };  class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { @@ -673,15 +724,29 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {      using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;  public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +      VkResult GetStatus() const noexcept {          return dld->vkGetEventStatus(owner, handle);      }  }; +class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> { +    using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle; + +public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +}; +  class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {      using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;  public: +    /// Set object name. +    void SetObjectNameEXT(const char* name) const; +      [[nodiscard]] u64 GetCounter() const {          u64 value;          Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); @@ -932,6 +997,12 @@ public:                              regions.data(), filter);      } +    void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, +                      VkImageLayout dst_layout, Span<VkImageResolve> regions) { +        dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), +                               regions.data()); +    } +      void Dispatch(u32 x, u32 y, u32 z) const noexcept {          dld->vkCmdDispatch(handle, x, y, z);      } @@ -946,6 +1017,23 @@ public:                                    image_barriers.size(), image_barriers.data());      } +    void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, +                         VkDependencyFlags dependency_flags = 0) const noexcept { +        PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); +    } + +    void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, +                         VkDependencyFlags dependency_flags, +                         const VkBufferMemoryBarrier& buffer_barrier) const noexcept { +        PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); +    } + +    void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, +                         VkDependencyFlags dependency_flags, +                         const VkImageMemoryBarrier& image_barrier) const noexcept { +        PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); +    } +      void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout,                             Span<VkBufferImageCopy> regions) const noexcept {          dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), @@ -979,6 +1067,13 @@ public:          dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);      } +    template <typename T> +    void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, +                       const T& data) const noexcept { +        static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable"); +        dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data); +    } +      void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {          dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());      } @@ -1088,6 +1183,20 @@ public:                                            counter_buffers, counter_buffer_offsets);      } +    void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { +        const VkDebugUtilsLabelEXT label_info{ +            .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, +            .pNext = nullptr, +            .pLabelName = label, +            .color{color[0], color[1], color[2], color[3]}, +        }; +        dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); +    } + +    void EndDebugUtilsLabelEXT() const noexcept { +        dld->vkCmdEndDebugUtilsLabelEXT(handle); +    } +  private:      VkCommandBuffer handle;      const DeviceDispatch* dld; diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp deleted file mode 100644 index 53c7ef12d..000000000 --- a/src/video_core/sampler_cache.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/sampler_cache.h" - -namespace VideoCommon { - -std::size_t SamplerCacheKey::Hash() const { -    static_assert(sizeof(raw) % sizeof(u64) == 0); -    return static_cast<std::size_t>( -        Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); -} - -bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { -    return raw == rhs.raw; -} - -} // namespace VideoCommon diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h deleted file mode 100644 index cbe3ad071..000000000 --- a/src/video_core/sampler_cache.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <cstddef> -#include <unordered_map> - -#include "video_core/textures/texture.h" - -namespace VideoCommon { - -struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { -    std::size_t Hash() const; - -    bool operator==(const SamplerCacheKey& rhs) const; - -    bool operator!=(const SamplerCacheKey& rhs) const { -        return !operator==(rhs); -    } -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash<VideoCommon::SamplerCacheKey> { -    std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { -        return k.Hash(); -    } -}; - -} // namespace std - -namespace VideoCommon { - -template <typename SamplerType, typename SamplerStorageType> -class SamplerCache { -public: -    SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { -        const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); -        auto& sampler = entry->second; -        if (is_cache_miss) { -            sampler = CreateSampler(tsc); -        } -        return ToSamplerType(sampler); -    } - -protected: -    virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; - -    virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; - -private: -    std::unordered_map<SamplerCacheKey, SamplerStorageType> cache; -}; - -} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 78245473c..09f93463b 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -137,10 +137,9 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,                                       const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,                                       Vulkan::VKDescriptorPool& descriptor_pool,                                       Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, -                                     Vulkan::VKRenderPassCache& renderpass_cache,                                       std::vector<VkDescriptorSetLayoutBinding> bindings,                                       Vulkan::SPIRVProgram program, -                                     Vulkan::GraphicsPipelineCacheKey key) { +                                     Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {      std::unique_lock lock(queue_mutex);      pending_queue.push({          .backend = Backend::Vulkan, @@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,          .scheduler = &scheduler,          .descriptor_pool = &descriptor_pool,          .update_descriptor_queue = &update_descriptor_queue, -        .renderpass_cache = &renderpass_cache,          .bindings = std::move(bindings),          .program = std::move(program),          .key = key, +        .num_color_buffers = num_color_buffers,      });      cv.notify_one();  } @@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context          } else if (work.backend == Backend::Vulkan) {              auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(                  *work.vk_device, *work.scheduler, *work.descriptor_pool, -                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, -                work.program); +                *work.update_descriptor_queue, work.key, work.bindings, work.program, +                work.num_color_buffers);              work.pp_cache->EmplacePipeline(std::move(pipeline));          } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 5a7216019..004e214a8 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -98,9 +98,9 @@ public:                             Vulkan::VKScheduler& scheduler,                             Vulkan::VKDescriptorPool& descriptor_pool,                             Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, -                           Vulkan::VKRenderPassCache& renderpass_cache,                             std::vector<VkDescriptorSetLayoutBinding> bindings, -                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); +                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, +                           u32 num_color_buffers);  private:      void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -127,10 +127,10 @@ private:          Vulkan::VKScheduler* scheduler;          Vulkan::VKDescriptorPool* descriptor_pool;          Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; -        Vulkan::VKRenderPassCache* renderpass_cache;          std::vector<VkDescriptorSetLayoutBinding> bindings;          Vulkan::SPIRVProgram program;          Vulkan::GraphicsPipelineCacheKey key; +        u32 num_color_buffers;      };      std::condition_variable cv; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index ab14c1aa3..6576d1208 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -25,7 +25,7 @@ using Tegra::Shader::OpCode;  namespace {  void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, -                              const std::list<Sampler>& used_samplers) { +                              const std::list<SamplerEntry>& used_samplers) {      if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {          return;      } @@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,      }  } -std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, +std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,                                          VideoCore::GuestDriverProfile& gpu_driver, -                                        const std::list<Sampler>& used_samplers) { +                                        const std::list<SamplerEntry>& used_samplers) {      const u32 base_offset = sampler_to_deduce.offset;      u32 max_offset{std::numeric_limits<u32>::max()};      for (const auto& sampler : used_samplers) { diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 532f66d27..5470e8cf4 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {      return pc;  } -Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {      const auto offset = static_cast<u32>(image.index.Value()); -    const auto it = std::find_if(std::begin(used_images), std::end(used_images), -                                 [offset](const Image& entry) { return entry.offset == offset; }); +    const auto it = +        std::find_if(std::begin(used_images), std::end(used_images), +                     [offset](const ImageEntry& entry) { return entry.offset == offset; });      if (it != std::end(used_images)) {          ASSERT(!it->is_bindless && it->type == type);          return *it; @@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t      return used_images.emplace_back(next_index, offset, type);  } -Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {      const Node image_register = GetRegister(reg);      const auto result =          TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); @@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im      const auto offset = std::get<2>(result);      const auto it = std::find_if(std::begin(used_images), std::end(used_images), -                                 [buffer, offset](const Image& entry) { +                                 [buffer, offset](const ImageEntry& entry) {                                       return entry.buffer == buffer && entry.offset == offset;                                   });      if (it != std::end(used_images)) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index fb18f631f..833fa2a39 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          SamplerInfo info;          info.is_shadow = is_depth_compare; -        const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); +        const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);          Node4 values;          for (u32 element = 0; element < values.size(); ++element) { @@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          SamplerInfo info;          info.type = texture_type;          info.is_array = is_array; -        const std::optional<Sampler> sampler = is_bindless -                                                   ? GetBindlessSampler(base_reg, info, index_var) -                                                   : GetSampler(instr.sampler, info); +        const std::optional<SamplerEntry> sampler = +            is_bindless ? GetBindlessSampler(base_reg, info, index_var) +                        : GetSampler(instr.sampler, info);          Node4 values;          if (!sampler) {              std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); @@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          [[fallthrough]];      case OpCode::Id::TXQ: {          Node index_var; -        const std::optional<Sampler> sampler = is_bindless -                                                   ? GetBindlessSampler(instr.gpr8, {}, index_var) -                                                   : GetSampler(instr.sampler, {}); +        const std::optional<SamplerEntry> sampler = +            is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) +                        : GetSampler(instr.sampler, {});          if (!sampler) {              u32 indexer = 0; @@ -272,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {          info.type = texture_type;          info.is_array = is_array;          Node index_var; -        const std::optional<Sampler> sampler = +        const std::optional<SamplerEntry> sampler =              is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)                          : GetSampler(instr.sampler, info); @@ -379,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(      return info;  } -std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, -                                            SamplerInfo sampler_info) { +std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, +                                                 SamplerInfo sampler_info) {      const u32 offset = static_cast<u32>(sampler.index.Value());      const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));      // If this sampler has already been used, return the existing mapping. -    const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), -                                 [offset](const Sampler& entry) { return entry.offset == offset; }); +    const auto it = +        std::find_if(used_samplers.begin(), used_samplers.end(), +                     [offset](const SamplerEntry& entry) { return entry.offset == offset; });      if (it != used_samplers.end()) {          ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&                 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -399,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,                                        *info.is_shadow, *info.is_buffer, false);  } -std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, -                                                    Node& index_var) { +std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +                                                         SamplerInfo info, Node& index_var) {      const Node sampler_register = GetRegister(reg);      const auto [base_node, tracked_sampler_info] =          TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); @@ -416,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,          // If this sampler has already been used, return the existing mapping.          const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), -                                     [buffer, offset](const Sampler& entry) { +                                     [buffer, offset](const SamplerEntry& entry) {                                           return entry.buffer == buffer && entry.offset == offset;                                       });          if (it != used_samplers.end()) { @@ -436,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,          info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));          // Try to use an already created sampler if it exists -        const auto it = std::find_if( -            used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { -                return offsets == std::pair{entry.offset, entry.secondary_offset} && -                       indices == std::pair{entry.buffer, entry.secondary_buffer}; -            }); +        const auto it = +            std::find_if(used_samplers.begin(), used_samplers.end(), +                         [indices, offsets](const SamplerEntry& entry) { +                             return offsets == std::pair{entry.offset, entry.secondary_offset} && +                                    indices == std::pair{entry.buffer, entry.secondary_buffer}; +                         });          if (it != used_samplers.end()) {              ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&                     it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -460,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,          // If this sampler has already been used, return the existing mapping.          const auto it = std::find_if(              used_samplers.begin(), used_samplers.end(), -            [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); +            [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });          if (it != used_samplers.end()) {              ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&                     it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && @@ -565,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,      info.is_buffer = false;      Node index_var; -    const std::optional<Sampler> sampler = is_bindless -                                               ? GetBindlessSampler(*bindless_reg, info, index_var) -                                               : GetSampler(instr.sampler, info); +    const std::optional<SamplerEntry> sampler = +        is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) +                    : GetSampler(instr.sampler, info);      if (!sampler) {          return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};      } @@ -724,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de      info.is_shadow = depth_compare;      Node index_var; -    const std::optional<Sampler> sampler = +    const std::optional<SamplerEntry> sampler =          is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)                      : GetSampler(instr.sampler, info);      Node4 values; @@ -783,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {      // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};      // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; -    const std::optional<Sampler> sampler = GetSampler(instr.sampler, {}); +    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});      Node4 values;      for (u32 element = 0; element < values.size(); ++element) { @@ -800,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is      info.type = texture_type;      info.is_array = is_array;      info.is_shadow = false; -    const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); +    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);      const std::size_t type_coord_count = GetCoordCount(texture_type);      const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8db9e1de7..b54d33763 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -282,25 +282,24 @@ struct SeparateSamplerNode;  using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;  using TrackSampler = std::shared_ptr<TrackSamplerData>; -struct Sampler { +struct SamplerEntry {      /// Bound samplers constructor -    constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, -                               bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) +    explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, +                          bool is_shadow_, bool is_buffer_, bool is_indexed_)          : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},            is_buffer{is_buffer_}, is_indexed{is_indexed_} {}      /// Separate sampler constructor -    constexpr explicit Sampler(u32 index_, std::pair<u32, u32> offsets_, -                               std::pair<u32, u32> buffers_, Tegra::Shader::TextureType type_, -                               bool is_array_, bool is_shadow_, bool is_buffer_) -        : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, -          buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, -          is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} +    explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, +                          Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, +                          bool is_buffer_) +        : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, +          buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, +          is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}      /// Bindless samplers constructor -    constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, -                               Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, -                               bool is_buffer_, bool is_indexed_) +    explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, +                          bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)          : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},            is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {      } @@ -340,14 +339,14 @@ struct BindlessSamplerNode {      u32 offset;  }; -struct Image { +struct ImageEntry {  public:      /// Bound images constructor -    constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) +    explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)          : index{index_}, offset{offset_}, type{type_} {}      /// Bindless samplers constructor -    constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) +    explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)          : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}      void MarkWrite() { @@ -391,7 +390,7 @@ struct MetaArithmetic {  /// Parameters describing a texture sampler  struct MetaTexture { -    Sampler sampler; +    SamplerEntry sampler;      Node array;      Node depth_compare;      std::vector<Node> aoffi; @@ -405,7 +404,7 @@ struct MetaTexture {  };  struct MetaImage { -    const Image& image; +    const ImageEntry& image;      std::vector<Node> values;      u32 element{};  }; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6aae14e34..0c6ab0f07 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -94,11 +94,11 @@ public:          return used_cbufs;      } -    const std::list<Sampler>& GetSamplers() const { +    const std::list<SamplerEntry>& GetSamplers() const {          return used_samplers;      } -    const std::list<Image>& GetImages() const { +    const std::list<ImageEntry>& GetImages() const {          return used_images;      } @@ -334,17 +334,17 @@ private:                                 std::optional<Tegra::Engines::SamplerDescriptor> sampler);      /// Accesses a texture sampler. -    std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); +    std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);      /// Accesses a texture sampler for a bindless texture. -    std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, -                                              Node& index_var); +    std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, +                                                   Node& index_var);      /// Accesses an image. -    Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); +    ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);      /// Access a bindless image sampler. -    Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); +    ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);      /// Extracts a sequence of bits from a node      Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -454,8 +454,8 @@ private:      std::set<Tegra::Shader::Attribute::Index> used_input_attributes;      std::set<Tegra::Shader::Attribute::Index> used_output_attributes;      std::map<u32, ConstBuffer> used_cbufs; -    std::list<Sampler> used_samplers; -    std::list<Image> used_images; +    std::list<SamplerEntry> used_samplers; +    std::list<ImageEntry> used_images;      std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};      std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;      bool uses_layer{}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 937e29d1e..6308aef94 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) {  }  std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { -    return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; +    return {DefaultBlockWidth(format), DefaultBlockHeight(format)};  }  } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index cfd12fa61..c40ab89d0 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -120,7 +120,7 @@ enum class PixelFormat {      Max = MaxDepthStencilFormat,      Invalid = 255,  }; -static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); +constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);  enum class SurfaceType {      ColorTexture = 0, @@ -140,117 +140,7 @@ enum class SurfaceTarget {      TextureCubeArray,  }; -constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ -    0, // A8B8G8R8_UNORM -    0, // A8B8G8R8_SNORM -    0, // A8B8G8R8_SINT -    0, // A8B8G8R8_UINT -    0, // R5G6B5_UNORM -    0, // B5G6R5_UNORM -    0, // A1R5G5B5_UNORM -    0, // A2B10G10R10_UNORM -    0, // A2B10G10R10_UINT -    0, // A1B5G5R5_UNORM -    0, // R8_UNORM -    0, // R8_SNORM -    0, // R8_SINT -    0, // R8_UINT -    0, // R16G16B16A16_FLOAT -    0, // R16G16B16A16_UNORM -    0, // R16G16B16A16_SNORM -    0, // R16G16B16A16_SINT -    0, // R16G16B16A16_UINT -    0, // B10G11R11_FLOAT -    0, // R32G32B32A32_UINT -    2, // BC1_RGBA_UNORM -    2, // BC2_UNORM -    2, // BC3_UNORM -    2, // BC4_UNORM -    2, // BC4_SNORM -    2, // BC5_UNORM -    2, // BC5_SNORM -    2, // BC7_UNORM -    2, // BC6H_UFLOAT -    2, // BC6H_SFLOAT -    2, // ASTC_2D_4X4_UNORM -    0, // B8G8R8A8_UNORM -    0, // R32G32B32A32_FLOAT -    0, // R32G32B32A32_SINT -    0, // R32G32_FLOAT -    0, // R32G32_SINT -    0, // R32_FLOAT -    0, // R16_FLOAT -    0, // R16_UNORM -    0, // R16_SNORM -    0, // R16_UINT -    0, // R16_SINT -    0, // R16G16_UNORM -    0, // R16G16_FLOAT -    0, // R16G16_UINT -    0, // R16G16_SINT -    0, // R16G16_SNORM -    0, // R32G32B32_FLOAT -    0, // A8B8G8R8_SRGB -    0, // R8G8_UNORM -    0, // R8G8_SNORM -    0, // R8G8_SINT -    0, // R8G8_UINT -    0, // R32G32_UINT -    0, // R16G16B16X16_FLOAT -    0, // R32_UINT -    0, // R32_SINT -    2, // ASTC_2D_8X8_UNORM -    2, // ASTC_2D_8X5_UNORM -    2, // ASTC_2D_5X4_UNORM -    0, // B8G8R8A8_SRGB -    2, // BC1_RGBA_SRGB -    2, // BC2_SRGB -    2, // BC3_SRGB -    2, // BC7_SRGB -    0, // A4B4G4R4_UNORM -    2, // ASTC_2D_4X4_SRGB -    2, // ASTC_2D_8X8_SRGB -    2, // ASTC_2D_8X5_SRGB -    2, // ASTC_2D_5X4_SRGB -    2, // ASTC_2D_5X5_UNORM -    2, // ASTC_2D_5X5_SRGB -    2, // ASTC_2D_10X8_UNORM -    2, // ASTC_2D_10X8_SRGB -    2, // ASTC_2D_6X6_UNORM -    2, // ASTC_2D_6X6_SRGB -    2, // ASTC_2D_10X10_UNORM -    2, // ASTC_2D_10X10_SRGB -    2, // ASTC_2D_12X12_UNORM -    2, // ASTC_2D_12X12_SRGB -    2, // ASTC_2D_8X6_UNORM -    2, // ASTC_2D_8X6_SRGB -    2, // ASTC_2D_6X5_UNORM -    2, // ASTC_2D_6X5_SRGB -    0, // E5B9G9R9_FLOAT -    0, // D32_FLOAT -    0, // D16_UNORM -    0, // D24_UNORM_S8_UINT -    0, // S8_UINT_D24_UNORM -    0, // D32_FLOAT_S8_UINT -}}; - -/** - * Gets the compression factor for the specified PixelFormat. This applies to just the - * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed - * texture formats. - */ -inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { -    DEBUG_ASSERT(format != PixelFormat::Invalid); -    DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size()); -    return compression_factor_shift_table[static_cast<std::size_t>(format)]; -} - -inline constexpr u32 GetCompressionFactor(PixelFormat format) { -    return 1U << GetCompressionFactorShift(format); -} - -constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ +constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{      1,  // A8B8G8R8_UNORM      1,  // A8B8G8R8_SNORM      1,  // A8B8G8R8_SINT @@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{      1,  // D32_FLOAT_S8_UINT  }}; -static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { -    if (format == PixelFormat::Invalid) -        return 0; - -    ASSERT(static_cast<std::size_t>(format) < block_width_table.size()); -    return block_width_table[static_cast<std::size_t>(format)]; +constexpr u32 DefaultBlockWidth(PixelFormat format) { +    ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size()); +    return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)];  } -constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ +constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{      1,  // A8B8G8R8_UNORM      1,  // A8B8G8R8_SNORM      1,  // A8B8G8R8_SINT @@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{      1,  // D32_FLOAT_S8_UINT  }}; -static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { -    if (format == PixelFormat::Invalid) -        return 0; - -    ASSERT(static_cast<std::size_t>(format) < block_height_table.size()); -    return block_height_table[static_cast<std::size_t>(format)]; +constexpr u32 DefaultBlockHeight(PixelFormat format) { +    ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size()); +    return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)];  } -constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ +constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{      32,  // A8B8G8R8_UNORM      32,  // A8B8G8R8_SNORM      32,  // A8B8G8R8_SINT @@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{      64,  // D32_FLOAT_S8_UINT  }}; -static constexpr u32 GetFormatBpp(PixelFormat format) { -    if (format == PixelFormat::Invalid) -        return 0; - -    ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); -    return bpp_table[static_cast<std::size_t>(format)]; +constexpr u32 BitsPerBlock(PixelFormat format) { +    ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size()); +    return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)];  }  /// Returns the sizer in bytes of the specified pixel format -static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { -    if (pixel_format == PixelFormat::Invalid) { -        return 0; -    } -    return GetFormatBpp(pixel_format) / CHAR_BIT; +constexpr u32 BytesPerBlock(PixelFormat pixel_format) { +    return BitsPerBlock(pixel_format) / CHAR_BIT;  }  SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100644 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bit> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon::Accelerated { + +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using VideoCore::Surface::BytesPerBlock; + +BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, +                                                          const ImageInfo& info) { +    const Extent3D block = swizzle.block; +    const Extent3D num_tiles = swizzle.num_tiles; +    const u32 bytes_per_block = BytesPerBlock(info.format); +    const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); +    const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; +    const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); +    return BlockLinearSwizzle2DParams{ +        .origin{0, 0, 0}, +        .destination{0, 0, 0}, +        .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), +        .layer_stride = info.layer_stride, +        .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), +        .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, +        .block_height = block.height, +        .block_height_mask = (1U << block.height) - 1, +    }; +} + +BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, +                                                          const ImageInfo& info) { +    const Extent3D block = swizzle.block; +    const Extent3D num_tiles = swizzle.num_tiles; +    const u32 bytes_per_block = BytesPerBlock(info.format); +    const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); +    const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + +    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; +    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); +    const u32 slice_size = +        Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; +    return BlockLinearSwizzle3DParams{ +        .origin{0, 0, 0}, +        .destination{0, 0, 0}, +        .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), +        .slice_size = slice_size, +        .block_size = block_size, +        .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, +        .block_height = block.height, +        .block_height_mask = (1U << block.height) - 1, +        .block_depth = block.depth, +        .block_depth_mask = (1U << block.depth) - 1, +    }; +} + +} // namespace VideoCommon::Accelerated
\ No newline at end of file diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100644 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon::Accelerated { + +struct BlockLinearSwizzle2DParams { +    std::array<u32, 3> origin; +    std::array<s32, 3> destination; +    u32 bytes_per_block_log2; +    u32 layer_stride; +    u32 block_size; +    u32 x_shift; +    u32 block_height; +    u32 block_height_mask; +}; + +struct BlockLinearSwizzle3DParams { +    std::array<u32, 3> origin; +    std::array<s32, 3> destination; +    u32 bytes_per_block_log2; +    u32 slice_size; +    u32 block_size; +    u32 x_shift; +    u32 block_height; +    u32 block_height_mask; +    u32 block_depth; +    u32 block_depth_mask; +}; + +[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( +    const SwizzleParameters& swizzle, const ImageInfo& info); + +[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( +    const SwizzleParameters& swizzle, const ImageInfo& info); + +} // namespace VideoCommon::Accelerated diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h deleted file mode 100644 index 5b475fe06..000000000 --- a/src/video_core/texture_cache/copy_params.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCommon { - -struct CopyParams { -    constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_, -                         u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_, -                         u32 depth_) -        : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_}, -          dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_}, -          dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {} - -    constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_) -        : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_}, -          dest_level{level_}, width{width_}, height{height_}, depth{depth_} {} - -    u32 source_x; -    u32 source_y; -    u32 source_z; -    u32 dest_x; -    u32 dest_y; -    u32 dest_z; -    u32 source_level; -    u32 dest_level; -    u32 width; -    u32 height; -    u32 depth; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp new file mode 100644 index 000000000..017327975 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.cpp @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <span> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { +    const u32 code_offset = 16 + 3 * (4 * y + x); +    const u32 code = (bits >> code_offset) & 7; +    const u32 red0 = (bits >> 0) & 0xff; +    const u32 red1 = (bits >> 8) & 0xff; +    if (red0 > red1) { +        switch (code) { +        case 0: +            return red0; +        case 1: +            return red1; +        case 2: +            return (6 * red0 + 1 * red1) / 7; +        case 3: +            return (5 * red0 + 2 * red1) / 7; +        case 4: +            return (4 * red0 + 3 * red1) / 7; +        case 5: +            return (3 * red0 + 4 * red1) / 7; +        case 6: +            return (2 * red0 + 5 * red1) / 7; +        case 7: +            return (1 * red0 + 6 * red1) / 7; +        } +    } else { +        switch (code) { +        case 0: +            return red0; +        case 1: +            return red1; +        case 2: +            return (4 * red0 + 1 * red1) / 5; +        case 3: +            return (3 * red0 + 2 * red1) / 5; +        case 4: +            return (2 * red0 + 3 * red1) / 5; +        case 5: +            return (1 * red0 + 4 * red1) / 5; +        case 6: +            return 0; +        case 7: +            return 0xff; +        } +    } +    return 0; +} + +void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { +    UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); +    UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); +    static constexpr u32 BLOCK_SIZE = 4; +    size_t input_offset = 0; +    for (u32 slice = 0; slice < extent.depth; ++slice) { +        for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { +            for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { +                u64 bits; +                std::memcpy(&bits, &input[input_offset], sizeof(bits)); +                input_offset += sizeof(bits); + +                for (u32 y = 0; y < BLOCK_SIZE; ++y) { +                    for (u32 x = 0; x < BLOCK_SIZE; ++x) { +                        const u32 linear_z = slice; +                        const u32 linear_y = block_y * BLOCK_SIZE + y; +                        const u32 linear_x = block_x * BLOCK_SIZE + x; +                        const u32 offset_z = linear_z * extent.width * extent.height; +                        const u32 offset_y = linear_y * extent.width; +                        const u32 offset_x = linear_x; +                        const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; +                        const u32 color = DecompressBlock(bits, x, y); +                        output[output_offset + 0] = static_cast<u8>(color); +                        output[output_offset + 1] = 0; +                        output[output_offset + 2] = 0; +                        output[output_offset + 3] = 0xff; +                    } +                } +            } +        } +    } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h new file mode 100644 index 000000000..63fb23508 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.h @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <span> + +#include "common/common_types.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h new file mode 100644 index 000000000..3a03b786f --- /dev/null +++ b/src/video_core/texture_cache/descriptor_table.h @@ -0,0 +1,82 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <vector> + +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/logging/log.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template <typename Descriptor> +class DescriptorTable { +public: +    explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} + +    [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) { +        [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { +            return false; +        } +        Refresh(gpu_addr, limit); +        return true; +    } + +    void Invalidate() noexcept { +        std::ranges::fill(read_descriptors, 0); +    } + +    [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) { +        DEBUG_ASSERT(index <= current_limit); +        const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); +        std::pair<Descriptor, bool> result; +        gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); +        if (IsDescriptorRead(index)) { +            result.second = result.first != descriptors[index]; +        } else { +            MarkDescriptorAsRead(index); +            result.second = true; +        } +        if (result.second) { +            descriptors[index] = result.first; +        } +        return result; +    } + +    [[nodiscard]] u32 Limit() const noexcept { +        return current_limit; +    } + +private: +    void Refresh(GPUVAddr gpu_addr, u32 limit) { +        current_gpu_addr = gpu_addr; +        current_limit = limit; + +        const size_t num_descriptors = static_cast<size_t>(limit) + 1; +        read_descriptors.clear(); +        read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); +        descriptors.resize(num_descriptors); +    } + +    void MarkDescriptorAsRead(u32 index) noexcept { +        read_descriptors[index / 64] |= 1ULL << (index % 64); +    } + +    [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { +        return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; +    } + +    Tegra::MemoryManager& gpu_memory; +    GPUVAddr current_gpu_addr{}; +    u32 current_limit{}; +    std::vector<u64> read_descriptors; +    std::vector<Descriptor> descriptors; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7938d71eb..ddfb726fe 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -2,7 +2,6 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -#include <array>  #include "common/common_types.h"  #include "common/logging/log.h"  #include "video_core/texture_cache/format_lookup_table.h" @@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;  constexpr auto SINT = ComponentType::SINT;  constexpr auto UINT = ComponentType::UINT;  constexpr auto FLOAT = ComponentType::FLOAT; -constexpr bool C = false; // Normal color -constexpr bool S = true;  // Srgb - -struct Table { -    constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, -                    ComponentType green_component_, ComponentType blue_component_, -                    ComponentType alpha_component_, PixelFormat pixel_format_) -        : texture_format{texture_format_}, pixel_format{pixel_format_}, -          red_component{red_component_}, green_component{green_component_}, -          blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} - -    TextureFormat texture_format; -    PixelFormat pixel_format; -    ComponentType red_component; -    ComponentType green_component; -    ComponentType blue_component; -    ComponentType alpha_component; -    bool is_srgb; -}; -constexpr std::array<Table, 86> DefinitionTable = {{ -    {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, -    {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, -    {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, -    {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, -    {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, - -    {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, - -    {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, -    {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, - -    {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, - -    {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, - -    {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, -    {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, -    {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, -    {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, - -    {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, -    {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, -    {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, -    {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, - -    {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, -    {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, -    {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, -    {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, -    {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, - -    {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, -    {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, -    {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, -    {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, -    {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, - -    {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, -    {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, -    {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, -    {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, -    {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, - -    {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, - -    {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, -    {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, -    {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, - -    {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, - -    {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, -    {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, -    {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, - -    {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, -    {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, -    {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, - -    {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, - -    {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, -    {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, -    {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, -    {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, -    {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, - -    {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, -    {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, - -    {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, -    {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, - -    {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, -    {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, - -    {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, -    {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, - -    {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, -    {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, - -    {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, -    {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, - -    {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, -    {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, - -    {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, -    {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, - -    {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, -    {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, - -    {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, -    {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, - -    {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, -    {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, - -    {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, -    {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, - -    {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, -    {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, - -    {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, -    {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, - -    {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, -    {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, - -    {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, -    {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, - -    {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, -    {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, +constexpr bool LINEAR = false; +constexpr bool SRGB = true; + +constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component, +                   ComponentType blue_component, ComponentType alpha_component, bool is_srgb) { +    u32 hash = is_srgb ? 1 : 0; +    hash |= static_cast<u32>(red_component) << 1; +    hash |= static_cast<u32>(green_component) << 4; +    hash |= static_cast<u32>(blue_component) << 7; +    hash |= static_cast<u32>(alpha_component) << 10; +    hash |= static_cast<u32>(format) << 13; +    return hash; +} -    {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, -    {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, -}}; +constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) { +    return Hash(format, component, component, component, component, is_srgb); +}  } // Anonymous namespace -FormatLookupTable::FormatLookupTable() { -    table.fill(static_cast<u8>(PixelFormat::Invalid)); - -    for (const auto& entry : DefinitionTable) { -        table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, -                             entry.green_component, entry.blue_component, entry.alpha_component)] = -            static_cast<u8>(entry.pixel_format); -    } -} - -PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, -                                              ComponentType red_component, -                                              ComponentType green_component, -                                              ComponentType blue_component, -                                              ComponentType alpha_component) const noexcept { -    const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( -        format, is_srgb, red_component, green_component, blue_component, alpha_component)]); -    // [[likely]] -    if (pixel_format != PixelFormat::Invalid) { -        return pixel_format; +PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green, +                                       ComponentType blue, ComponentType alpha, +                                       bool is_srgb) noexcept { +    switch (Hash(format, red, green, blue, alpha, is_srgb)) { +    case Hash(TextureFormat::A8R8G8B8, UNORM): +        return PixelFormat::A8B8G8R8_UNORM; +    case Hash(TextureFormat::A8R8G8B8, SNORM): +        return PixelFormat::A8B8G8R8_SNORM; +    case Hash(TextureFormat::A8R8G8B8, UINT): +        return PixelFormat::A8B8G8R8_UINT; +    case Hash(TextureFormat::A8R8G8B8, SINT): +        return PixelFormat::A8B8G8R8_SINT; +    case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): +        return PixelFormat::A8B8G8R8_SRGB; +    case Hash(TextureFormat::B5G6R5, UNORM): +        return PixelFormat::B5G6R5_UNORM; +    case Hash(TextureFormat::A2B10G10R10, UNORM): +        return PixelFormat::A2B10G10R10_UNORM; +    case Hash(TextureFormat::A2B10G10R10, UINT): +        return PixelFormat::A2B10G10R10_UINT; +    case Hash(TextureFormat::A1B5G5R5, UNORM): +        return PixelFormat::A1B5G5R5_UNORM; +    case Hash(TextureFormat::A4B4G4R4, UNORM): +        return PixelFormat::A4B4G4R4_UNORM; +    case Hash(TextureFormat::R8, UNORM): +        return PixelFormat::R8_UNORM; +    case Hash(TextureFormat::R8, SNORM): +        return PixelFormat::R8_SNORM; +    case Hash(TextureFormat::R8, UINT): +        return PixelFormat::R8_UINT; +    case Hash(TextureFormat::R8, SINT): +        return PixelFormat::R8_SINT; +    case Hash(TextureFormat::R8G8, UNORM): +        return PixelFormat::R8G8_UNORM; +    case Hash(TextureFormat::R8G8, SNORM): +        return PixelFormat::R8G8_SNORM; +    case Hash(TextureFormat::R8G8, UINT): +        return PixelFormat::R8G8_UINT; +    case Hash(TextureFormat::R8G8, SINT): +        return PixelFormat::R8G8_SINT; +    case Hash(TextureFormat::R16G16B16A16, FLOAT): +        return PixelFormat::R16G16B16A16_FLOAT; +    case Hash(TextureFormat::R16G16B16A16, UNORM): +        return PixelFormat::R16G16B16A16_UNORM; +    case Hash(TextureFormat::R16G16B16A16, SNORM): +        return PixelFormat::R16G16B16A16_SNORM; +    case Hash(TextureFormat::R16G16B16A16, UINT): +        return PixelFormat::R16G16B16A16_UINT; +    case Hash(TextureFormat::R16G16B16A16, SINT): +        return PixelFormat::R16G16B16A16_SINT; +    case Hash(TextureFormat::R16G16, FLOAT): +        return PixelFormat::R16G16_FLOAT; +    case Hash(TextureFormat::R16G16, UNORM): +        return PixelFormat::R16G16_UNORM; +    case Hash(TextureFormat::R16G16, SNORM): +        return PixelFormat::R16G16_SNORM; +    case Hash(TextureFormat::R16G16, UINT): +        return PixelFormat::R16G16_UINT; +    case Hash(TextureFormat::R16G16, SINT): +        return PixelFormat::R16G16_SINT; +    case Hash(TextureFormat::R16, FLOAT): +        return PixelFormat::R16_FLOAT; +    case Hash(TextureFormat::R16, UNORM): +        return PixelFormat::R16_UNORM; +    case Hash(TextureFormat::R16, SNORM): +        return PixelFormat::R16_SNORM; +    case Hash(TextureFormat::R16, UINT): +        return PixelFormat::R16_UINT; +    case Hash(TextureFormat::R16, SINT): +        return PixelFormat::R16_SINT; +    case Hash(TextureFormat::B10G11R11, FLOAT): +        return PixelFormat::B10G11R11_FLOAT; +    case Hash(TextureFormat::R32G32B32A32, FLOAT): +        return PixelFormat::R32G32B32A32_FLOAT; +    case Hash(TextureFormat::R32G32B32A32, UINT): +        return PixelFormat::R32G32B32A32_UINT; +    case Hash(TextureFormat::R32G32B32A32, SINT): +        return PixelFormat::R32G32B32A32_SINT; +    case Hash(TextureFormat::R32G32B32, FLOAT): +        return PixelFormat::R32G32B32_FLOAT; +    case Hash(TextureFormat::R32G32, FLOAT): +        return PixelFormat::R32G32_FLOAT; +    case Hash(TextureFormat::R32G32, UINT): +        return PixelFormat::R32G32_UINT; +    case Hash(TextureFormat::R32G32, SINT): +        return PixelFormat::R32G32_SINT; +    case Hash(TextureFormat::R32, FLOAT): +        return PixelFormat::R32_FLOAT; +    case Hash(TextureFormat::R32, UINT): +        return PixelFormat::R32_UINT; +    case Hash(TextureFormat::R32, SINT): +        return PixelFormat::R32_SINT; +    case Hash(TextureFormat::E5B9G9R9, FLOAT): +        return PixelFormat::E5B9G9R9_FLOAT; +    case Hash(TextureFormat::D32, FLOAT): +        return PixelFormat::D32_FLOAT; +    case Hash(TextureFormat::D16, UNORM): +        return PixelFormat::D16_UNORM; +    case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): +        return PixelFormat::S8_UINT_D24_UNORM; +    case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): +        return PixelFormat::S8_UINT_D24_UNORM; +    case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): +        return PixelFormat::D32_FLOAT_S8_UINT; +    case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): +        return PixelFormat::BC1_RGBA_UNORM; +    case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): +        return PixelFormat::BC1_RGBA_SRGB; +    case Hash(TextureFormat::BC2, UNORM, LINEAR): +        return PixelFormat::BC2_UNORM; +    case Hash(TextureFormat::BC2, UNORM, SRGB): +        return PixelFormat::BC2_SRGB; +    case Hash(TextureFormat::BC3, UNORM, LINEAR): +        return PixelFormat::BC3_UNORM; +    case Hash(TextureFormat::BC3, UNORM, SRGB): +        return PixelFormat::BC3_SRGB; +    case Hash(TextureFormat::BC4, UNORM): +        return PixelFormat::BC4_UNORM; +    case Hash(TextureFormat::BC4, SNORM): +        return PixelFormat::BC4_SNORM; +    case Hash(TextureFormat::BC5, UNORM): +        return PixelFormat::BC5_UNORM; +    case Hash(TextureFormat::BC5, SNORM): +        return PixelFormat::BC5_SNORM; +    case Hash(TextureFormat::BC7, UNORM, LINEAR): +        return PixelFormat::BC7_UNORM; +    case Hash(TextureFormat::BC7, UNORM, SRGB): +        return PixelFormat::BC7_SRGB; +    case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): +        return PixelFormat::BC6H_SFLOAT; +    case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): +        return PixelFormat::BC6H_UFLOAT; +    case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_4X4_UNORM; +    case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB): +        return PixelFormat::ASTC_2D_4X4_SRGB; +    case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_5X4_UNORM; +    case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB): +        return PixelFormat::ASTC_2D_5X4_SRGB; +    case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_5X5_UNORM; +    case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB): +        return PixelFormat::ASTC_2D_5X5_SRGB; +    case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_8X8_UNORM; +    case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB): +        return PixelFormat::ASTC_2D_8X8_SRGB; +    case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_8X5_UNORM; +    case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB): +        return PixelFormat::ASTC_2D_8X5_SRGB; +    case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_10X8_UNORM; +    case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB): +        return PixelFormat::ASTC_2D_10X8_SRGB; +    case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_6X6_UNORM; +    case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB): +        return PixelFormat::ASTC_2D_6X6_SRGB; +    case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_10X10_UNORM; +    case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): +        return PixelFormat::ASTC_2D_10X10_SRGB; +    case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_12X12_UNORM; +    case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): +        return PixelFormat::ASTC_2D_12X12_SRGB; +    case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_8X6_UNORM; +    case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB): +        return PixelFormat::ASTC_2D_8X6_SRGB; +    case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR): +        return PixelFormat::ASTC_2D_6X5_UNORM; +    case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB): +        return PixelFormat::ASTC_2D_6X5_SRGB;      }      UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", -                      static_cast<int>(format), is_srgb, static_cast<int>(red_component), -                      static_cast<int>(green_component), static_cast<int>(blue_component), -                      static_cast<int>(alpha_component)); +                      static_cast<int>(format), is_srgb, static_cast<int>(red), +                      static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));      return PixelFormat::A8B8G8R8_UNORM;  } -void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, -                            ComponentType green_component, ComponentType blue_component, -                            ComponentType alpha_component, PixelFormat pixel_format) {} - -std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, -                                              ComponentType red_component, -                                              ComponentType green_component, -                                              ComponentType blue_component, -                                              ComponentType alpha_component) noexcept { -    const auto format_index = static_cast<std::size_t>(format); -    const auto red_index = static_cast<std::size_t>(red_component); -    const auto green_index = static_cast<std::size_t>(green_component); -    const auto blue_index = static_cast<std::size_t>(blue_component); -    const auto alpha_index = static_cast<std::size_t>(alpha_component); -    const std::size_t srgb_index = is_srgb ? 1 : 0; - -    return format_index * PerFormat + -           srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + -           alpha_index * PerComponent * PerComponent * PerComponent + -           blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; -} -  } // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h index aa77e0a5a..729533999 100644 --- a/src/video_core/texture_cache/format_lookup_table.h +++ b/src/video_core/texture_cache/format_lookup_table.h @@ -4,48 +4,14 @@  #pragma once -#include <array> -#include <limits>  #include "video_core/surface.h"  #include "video_core/textures/texture.h"  namespace VideoCommon { -class FormatLookupTable { -public: -    explicit FormatLookupTable(); - -    VideoCore::Surface::PixelFormat GetPixelFormat( -        Tegra::Texture::TextureFormat format, bool is_srgb, -        Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, -        Tegra::Texture::ComponentType blue_component, -        Tegra::Texture::ComponentType alpha_component) const noexcept; - -private: -    static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max()); - -    static constexpr std::size_t NumTextureFormats = 128; - -    static constexpr std::size_t PerComponent = 8; -    static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; -    static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; -    static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; -    static constexpr std::size_t PerFormat = PerComponents4 * 2; - -    static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, -                                      Tegra::Texture::ComponentType red_component, -                                      Tegra::Texture::ComponentType green_component, -                                      Tegra::Texture::ComponentType blue_component, -                                      Tegra::Texture::ComponentType alpha_component) noexcept; - -    void Set(Tegra::Texture::TextureFormat format, bool is_srgb, -             Tegra::Texture::ComponentType red_component, -             Tegra::Texture::ComponentType green_component, -             Tegra::Texture::ComponentType blue_component, -             Tegra::Texture::ComponentType alpha_component, -             VideoCore::Surface::PixelFormat pixel_format); - -    std::array<u8, NumTextureFormats * PerFormat> table; -}; +VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo( +    Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component, +    Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component, +    Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;  } // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp new file mode 100644 index 000000000..d10ba4ccd --- /dev/null +++ b/src/video_core/texture_cache/formatter.cpp @@ -0,0 +1,95 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> + +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/render_targets.h" + +namespace VideoCommon { + +std::string Name(const ImageBase& image) { +    const GPUVAddr gpu_addr = image.gpu_addr; +    const ImageInfo& info = image.info; +    const u32 width = info.size.width; +    const u32 height = info.size.height; +    const u32 depth = info.size.depth; +    const u32 num_layers = image.info.resources.layers; +    const u32 num_levels = image.info.resources.levels; +    std::string resource; +    if (num_layers > 1) { +        resource += fmt::format(":L{}", num_layers); +    } +    if (num_levels > 1) { +        resource += fmt::format(":M{}", num_levels); +    } +    switch (image.info.type) { +    case ImageType::e1D: +        return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource); +    case ImageType::e2D: +        return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource); +    case ImageType::e3D: +        return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource); +    case ImageType::Linear: +        return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height); +    case ImageType::Buffer: +        return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width); +    } +    return "Invalid"; +} + +std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { +    const u32 width = image_view.size.width; +    const u32 height = image_view.size.height; +    const u32 depth = image_view.size.depth; +    const u32 num_levels = image_view.range.extent.levels; +    const u32 num_layers = image_view.range.extent.layers; + +    const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; +    switch (type.value_or(image_view.type)) { +    case ImageViewType::e1D: +        return fmt::format("ImageView 1D {}{}", width, level); +    case ImageViewType::e2D: +        return fmt::format("ImageView 2D {}x{}{}", width, height, level); +    case ImageViewType::Cube: +        return fmt::format("ImageView Cube {}x{}{}", width, height, level); +    case ImageViewType::e3D: +        return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); +    case ImageViewType::e1DArray: +        return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); +    case ImageViewType::e2DArray: +        return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); +    case ImageViewType::CubeArray: +        return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); +    case ImageViewType::Rect: +        return fmt::format("ImageView Rect {}x{}{}", width, height, level); +    case ImageViewType::Buffer: +        return fmt::format("BufferView {}", width); +    } +    return "Invalid"; +} + +std::string Name(const RenderTargets& render_targets) { +    std::string_view debug_prefix; +    const auto num_color = std::ranges::count_if( +        render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); }); +    if (render_targets.depth_buffer_id) { +        debug_prefix = num_color > 0 ? "R" : "Z"; +    } else { +        debug_prefix = num_color > 0 ? "C" : "X"; +    } +    const Extent2D size = render_targets.size; +    if (num_color > 0) { +        return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width, +                           size.height); +    } else { +        return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height); +    } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h new file mode 100644 index 000000000..a48413983 --- /dev/null +++ b/src/video_core/texture_cache/formatter.h @@ -0,0 +1,263 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include <fmt/format.h> + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +template <> +struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> { +    template <typename FormatContext> +    auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) { +        using VideoCore::Surface::PixelFormat; +        const string_view name = [format] { +            switch (format) { +            case PixelFormat::A8B8G8R8_UNORM: +                return "A8B8G8R8_UNORM"; +            case PixelFormat::A8B8G8R8_SNORM: +                return "A8B8G8R8_SNORM"; +            case PixelFormat::A8B8G8R8_SINT: +                return "A8B8G8R8_SINT"; +            case PixelFormat::A8B8G8R8_UINT: +                return "A8B8G8R8_UINT"; +            case PixelFormat::R5G6B5_UNORM: +                return "R5G6B5_UNORM"; +            case PixelFormat::B5G6R5_UNORM: +                return "B5G6R5_UNORM"; +            case PixelFormat::A1R5G5B5_UNORM: +                return "A1R5G5B5_UNORM"; +            case PixelFormat::A2B10G10R10_UNORM: +                return "A2B10G10R10_UNORM"; +            case PixelFormat::A2B10G10R10_UINT: +                return "A2B10G10R10_UINT"; +            case PixelFormat::A1B5G5R5_UNORM: +                return "A1B5G5R5_UNORM"; +            case PixelFormat::R8_UNORM: +                return "R8_UNORM"; +            case PixelFormat::R8_SNORM: +                return "R8_SNORM"; +            case PixelFormat::R8_SINT: +                return "R8_SINT"; +            case PixelFormat::R8_UINT: +                return "R8_UINT"; +            case PixelFormat::R16G16B16A16_FLOAT: +                return "R16G16B16A16_FLOAT"; +            case PixelFormat::R16G16B16A16_UNORM: +                return "R16G16B16A16_UNORM"; +            case PixelFormat::R16G16B16A16_SNORM: +                return "R16G16B16A16_SNORM"; +            case PixelFormat::R16G16B16A16_SINT: +                return "R16G16B16A16_SINT"; +            case PixelFormat::R16G16B16A16_UINT: +                return "R16G16B16A16_UINT"; +            case PixelFormat::B10G11R11_FLOAT: +                return "B10G11R11_FLOAT"; +            case PixelFormat::R32G32B32A32_UINT: +                return "R32G32B32A32_UINT"; +            case PixelFormat::BC1_RGBA_UNORM: +                return "BC1_RGBA_UNORM"; +            case PixelFormat::BC2_UNORM: +                return "BC2_UNORM"; +            case PixelFormat::BC3_UNORM: +                return "BC3_UNORM"; +            case PixelFormat::BC4_UNORM: +                return "BC4_UNORM"; +            case PixelFormat::BC4_SNORM: +                return "BC4_SNORM"; +            case PixelFormat::BC5_UNORM: +                return "BC5_UNORM"; +            case PixelFormat::BC5_SNORM: +                return "BC5_SNORM"; +            case PixelFormat::BC7_UNORM: +                return "BC7_UNORM"; +            case PixelFormat::BC6H_UFLOAT: +                return "BC6H_UFLOAT"; +            case PixelFormat::BC6H_SFLOAT: +                return "BC6H_SFLOAT"; +            case PixelFormat::ASTC_2D_4X4_UNORM: +                return "ASTC_2D_4X4_UNORM"; +            case PixelFormat::B8G8R8A8_UNORM: +                return "B8G8R8A8_UNORM"; +            case PixelFormat::R32G32B32A32_FLOAT: +                return "R32G32B32A32_FLOAT"; +            case PixelFormat::R32G32B32A32_SINT: +                return "R32G32B32A32_SINT"; +            case PixelFormat::R32G32_FLOAT: +                return "R32G32_FLOAT"; +            case PixelFormat::R32G32_SINT: +                return "R32G32_SINT"; +            case PixelFormat::R32_FLOAT: +                return "R32_FLOAT"; +            case PixelFormat::R16_FLOAT: +                return "R16_FLOAT"; +            case PixelFormat::R16_UNORM: +                return "R16_UNORM"; +            case PixelFormat::R16_SNORM: +                return "R16_SNORM"; +            case PixelFormat::R16_UINT: +                return "R16_UINT"; +            case PixelFormat::R16_SINT: +                return "R16_SINT"; +            case PixelFormat::R16G16_UNORM: +                return "R16G16_UNORM"; +            case PixelFormat::R16G16_FLOAT: +                return "R16G16_FLOAT"; +            case PixelFormat::R16G16_UINT: +                return "R16G16_UINT"; +            case PixelFormat::R16G16_SINT: +                return "R16G16_SINT"; +            case PixelFormat::R16G16_SNORM: +                return "R16G16_SNORM"; +            case PixelFormat::R32G32B32_FLOAT: +                return "R32G32B32_FLOAT"; +            case PixelFormat::A8B8G8R8_SRGB: +                return "A8B8G8R8_SRGB"; +            case PixelFormat::R8G8_UNORM: +                return "R8G8_UNORM"; +            case PixelFormat::R8G8_SNORM: +                return "R8G8_SNORM"; +            case PixelFormat::R8G8_SINT: +                return "R8G8_SINT"; +            case PixelFormat::R8G8_UINT: +                return "R8G8_UINT"; +            case PixelFormat::R32G32_UINT: +                return "R32G32_UINT"; +            case PixelFormat::R16G16B16X16_FLOAT: +                return "R16G16B16X16_FLOAT"; +            case PixelFormat::R32_UINT: +                return "R32_UINT"; +            case PixelFormat::R32_SINT: +                return "R32_SINT"; +            case PixelFormat::ASTC_2D_8X8_UNORM: +                return "ASTC_2D_8X8_UNORM"; +            case PixelFormat::ASTC_2D_8X5_UNORM: +                return "ASTC_2D_8X5_UNORM"; +            case PixelFormat::ASTC_2D_5X4_UNORM: +                return "ASTC_2D_5X4_UNORM"; +            case PixelFormat::B8G8R8A8_SRGB: +                return "B8G8R8A8_SRGB"; +            case PixelFormat::BC1_RGBA_SRGB: +                return "BC1_RGBA_SRGB"; +            case PixelFormat::BC2_SRGB: +                return "BC2_SRGB"; +            case PixelFormat::BC3_SRGB: +                return "BC3_SRGB"; +            case PixelFormat::BC7_SRGB: +                return "BC7_SRGB"; +            case PixelFormat::A4B4G4R4_UNORM: +                return "A4B4G4R4_UNORM"; +            case PixelFormat::ASTC_2D_4X4_SRGB: +                return "ASTC_2D_4X4_SRGB"; +            case PixelFormat::ASTC_2D_8X8_SRGB: +                return "ASTC_2D_8X8_SRGB"; +            case PixelFormat::ASTC_2D_8X5_SRGB: +                return "ASTC_2D_8X5_SRGB"; +            case PixelFormat::ASTC_2D_5X4_SRGB: +                return "ASTC_2D_5X4_SRGB"; +            case PixelFormat::ASTC_2D_5X5_UNORM: +                return "ASTC_2D_5X5_UNORM"; +            case PixelFormat::ASTC_2D_5X5_SRGB: +                return "ASTC_2D_5X5_SRGB"; +            case PixelFormat::ASTC_2D_10X8_UNORM: +                return "ASTC_2D_10X8_UNORM"; +            case PixelFormat::ASTC_2D_10X8_SRGB: +                return "ASTC_2D_10X8_SRGB"; +            case PixelFormat::ASTC_2D_6X6_UNORM: +                return "ASTC_2D_6X6_UNORM"; +            case PixelFormat::ASTC_2D_6X6_SRGB: +                return "ASTC_2D_6X6_SRGB"; +            case PixelFormat::ASTC_2D_10X10_UNORM: +                return "ASTC_2D_10X10_UNORM"; +            case PixelFormat::ASTC_2D_10X10_SRGB: +                return "ASTC_2D_10X10_SRGB"; +            case PixelFormat::ASTC_2D_12X12_UNORM: +                return "ASTC_2D_12X12_UNORM"; +            case PixelFormat::ASTC_2D_12X12_SRGB: +                return "ASTC_2D_12X12_SRGB"; +            case PixelFormat::ASTC_2D_8X6_UNORM: +                return "ASTC_2D_8X6_UNORM"; +            case PixelFormat::ASTC_2D_8X6_SRGB: +                return "ASTC_2D_8X6_SRGB"; +            case PixelFormat::ASTC_2D_6X5_UNORM: +                return "ASTC_2D_6X5_UNORM"; +            case PixelFormat::ASTC_2D_6X5_SRGB: +                return "ASTC_2D_6X5_SRGB"; +            case PixelFormat::E5B9G9R9_FLOAT: +                return "E5B9G9R9_FLOAT"; +            case PixelFormat::D32_FLOAT: +                return "D32_FLOAT"; +            case PixelFormat::D16_UNORM: +                return "D16_UNORM"; +            case PixelFormat::D24_UNORM_S8_UINT: +                return "D24_UNORM_S8_UINT"; +            case PixelFormat::S8_UINT_D24_UNORM: +                return "S8_UINT_D24_UNORM"; +            case PixelFormat::D32_FLOAT_S8_UINT: +                return "D32_FLOAT_S8_UINT"; +            case PixelFormat::MaxDepthStencilFormat: +            case PixelFormat::Invalid: +                return "Invalid"; +            } +            return "Invalid"; +        }(); +        return formatter<string_view>::format(name, ctx); +    } +}; + +template <> +struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> { +    template <typename FormatContext> +    auto format(VideoCommon::ImageType type, FormatContext& ctx) { +        const string_view name = [type] { +            using VideoCommon::ImageType; +            switch (type) { +            case ImageType::e1D: +                return "1D"; +            case ImageType::e2D: +                return "2D"; +            case ImageType::e3D: +                return "3D"; +            case ImageType::Linear: +                return "Linear"; +            case ImageType::Buffer: +                return "Buffer"; +            } +            return "Invalid"; +        }(); +        return formatter<string_view>::format(name, ctx); +    } +}; + +template <> +struct fmt::formatter<VideoCommon::Extent3D> { +    constexpr auto parse(fmt::format_parse_context& ctx) { +        return ctx.begin(); +    } + +    template <typename FormatContext> +    auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) { +        return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height, +                              extent.depth); +    } +}; + +namespace VideoCommon { + +struct ImageBase; +struct ImageViewBase; +struct RenderTargets; + +[[nodiscard]] std::string Name(const ImageBase& image); + +[[nodiscard]] std::string Name(const ImageViewBase& image_view, +                               std::optional<ImageViewType> type = std::nullopt); + +[[nodiscard]] std::string Name(const RenderTargets& render_targets); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp new file mode 100644 index 000000000..448a05fcc --- /dev/null +++ b/src/video_core/texture_cache/image_base.cpp @@ -0,0 +1,216 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <optional> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/util.h" + +namespace VideoCommon { + +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; + +namespace { +/// Returns the base layer and mip level offset +[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) { +    if (layer_stride == 0) { +        return {0, diff}; +    } else { +        return {diff / layer_stride, diff % layer_stride}; +    } +} + +[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) { +    return layers.base_level < info.resources.levels && +           layers.base_layer + layers.num_layers <= info.resources.layers; +} + +[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) { +    const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level); +    const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level); +    if (!ValidateLayers(copy.src_subresource, src)) { +        return false; +    } +    if (!ValidateLayers(copy.dst_subresource, dst)) { +        return false; +    } +    if (copy.src_offset.x + copy.extent.width > src_size.width || +        copy.src_offset.y + copy.extent.height > src_size.height || +        copy.src_offset.z + copy.extent.depth > src_size.depth) { +        return false; +    } +    if (copy.dst_offset.x + copy.extent.width > dst_size.width || +        copy.dst_offset.y + copy.extent.height > dst_size.height || +        copy.dst_offset.z + copy.extent.depth > dst_size.depth) { +        return false; +    } +    return true; +} +} // Anonymous namespace + +ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) +    : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, +      unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, +      converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, +      cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, +      mip_level_offsets{CalculateMipLevelOffsets(info)} { +    if (info.type == ImageType::e3D) { +        slice_offsets = CalculateSliceOffsets(info); +        slice_subresources = CalculateSliceSubresources(info); +    } +} + +std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { +    if (other_addr < gpu_addr) { +        // Subresource address can't be lower than the base +        return std::nullopt; +    } +    const u32 diff = static_cast<u32>(other_addr - gpu_addr); +    if (diff > guest_size_bytes) { +        // This can happen when two CPU addresses are used for different GPU addresses +        return std::nullopt; +    } +    if (info.type != ImageType::e3D) { +        const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); +        const auto end = mip_level_offsets.begin() + info.resources.levels; +        const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); +        if (layer > info.resources.layers || it == end) { +            return std::nullopt; +        } +        return SubresourceBase{ +            .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)), +            .layer = layer, +        }; +    } else { +        // TODO: Consider using binary_search after a threshold +        const auto it = std::ranges::find(slice_offsets, diff); +        if (it == slice_offsets.cend()) { +            return std::nullopt; +        } +        return slice_subresources[std::distance(slice_offsets.begin(), it)]; +    } +} + +ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept { +    const auto it = std::ranges::find(image_view_infos, view_info); +    if (it == image_view_infos.end()) { +        return ImageViewId{}; +    } +    return image_view_ids[std::distance(image_view_infos.begin(), it)]; +} + +void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) { +    image_view_infos.push_back(view_info); +    image_view_ids.push_back(image_view_id); +} + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { +    static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; +    ASSERT(lhs.info.type == rhs.info.type); +    std::optional<SubresourceBase> base; +    if (lhs.info.type == ImageType::Linear) { +        base = SubresourceBase{.level = 0, .layer = 0}; +    } else { +        base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS); +    } +    if (!base) { +        LOG_ERROR(HW_GPU, "Image alias should have been flipped"); +        return; +    } +    const PixelFormat lhs_format = lhs.info.format; +    const PixelFormat rhs_format = rhs.info.format; +    const Extent2D lhs_block{ +        .width = DefaultBlockWidth(lhs_format), +        .height = DefaultBlockHeight(lhs_format), +    }; +    const Extent2D rhs_block{ +        .width = DefaultBlockWidth(rhs_format), +        .height = DefaultBlockHeight(rhs_format), +    }; +    const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; +    const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; +    if (is_lhs_compressed && is_rhs_compressed) { +        LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); +        return; +    } +    const s32 lhs_mips = lhs.info.resources.levels; +    const s32 rhs_mips = rhs.info.resources.levels; +    const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); +    AliasedImage lhs_alias; +    AliasedImage rhs_alias; +    lhs_alias.id = rhs_id; +    rhs_alias.id = lhs_id; +    lhs_alias.copies.reserve(num_mips); +    rhs_alias.copies.reserve(num_mips); +    for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) { +        Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); +        Extent3D rhs_size = MipSize(rhs.info.size, mip_level); +        if (is_lhs_compressed) { +            lhs_size.width /= lhs_block.width; +            lhs_size.height /= lhs_block.height; +        } +        if (is_rhs_compressed) { +            rhs_size.width /= rhs_block.width; +            rhs_size.height /= rhs_block.height; +        } +        const Extent3D copy_size{ +            .width = std::min(lhs_size.width, rhs_size.width), +            .height = std::min(lhs_size.height, rhs_size.height), +            .depth = std::min(lhs_size.depth, rhs_size.depth), +        }; +        if (copy_size.width == 0 || copy_size.height == 0) { +            LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); +            continue; +        } +        const bool is_lhs_3d = lhs.info.type == ImageType::e3D; +        const bool is_rhs_3d = rhs.info.type == ImageType::e3D; +        const Offset3D lhs_offset{0, 0, 0}; +        const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; +        const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; +        const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; +        const s32 num_layers = std::min(lhs_layers, rhs_layers); +        const SubresourceLayers lhs_subresource{ +            .base_level = mip_level, +            .base_layer = 0, +            .num_layers = num_layers, +        }; +        const SubresourceLayers rhs_subresource{ +            .base_level = base->level + mip_level, +            .base_layer = is_rhs_3d ? 0 : base->layer, +            .num_layers = num_layers, +        }; +        [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{ +            .src_subresource = lhs_subresource, +            .dst_subresource = rhs_subresource, +            .src_offset = lhs_offset, +            .dst_offset = rhs_offset, +            .extent = copy_size, +        }); +        [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{ +            .src_subresource = rhs_subresource, +            .dst_subresource = lhs_subresource, +            .src_offset = rhs_offset, +            .dst_offset = lhs_offset, +            .extent = copy_size, +        }); +        ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy"); +        ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy"); +    } +    ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); +    if (lhs_alias.copies.empty()) { +        return; +    } +    lhs.aliased_images.push_back(std::move(lhs_alias)); +    rhs.aliased_images.push_back(std::move(rhs_alias)); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h new file mode 100644 index 000000000..b7f3b7e43 --- /dev/null +++ b/src/video_core/texture_cache/image_base.h @@ -0,0 +1,83 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <optional> +#include <vector> + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +enum class ImageFlagBits : u32 { +    AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU +    Converted = 1 << 1,   ///< Guest format is not supported natively and it has to be converted +    CpuModified = 1 << 2, ///< Contents have been modified from the CPU +    GpuModified = 1 << 3, ///< Contents have been modified from the GPU +    Tracked = 1 << 4,     ///< Writes and reads are being hooked from the CPU JIT +    Strong = 1 << 5,      ///< Exists in the image table, the dimensions are can be trusted +    Registered = 1 << 6,  ///< True when the image is registered +    Picked = 1 << 7,      ///< Temporary flag to mark the image as picked +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) + +struct ImageViewInfo; + +struct AliasedImage { +    std::vector<ImageCopy> copies; +    ImageId id; +}; + +struct ImageBase { +    explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + +    [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; + +    [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; + +    void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); + +    [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { +        const VAddr overlap_end = overlap_cpu_addr + overlap_size; +        return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; +    } + +    ImageInfo info; + +    u32 guest_size_bytes = 0; +    u32 unswizzled_size_bytes = 0; +    u32 converted_size_bytes = 0; +    ImageFlagBits flags = ImageFlagBits::CpuModified; + +    GPUVAddr gpu_addr = 0; +    VAddr cpu_addr = 0; +    VAddr cpu_addr_end = 0; + +    u64 modification_tick = 0; +    u64 frame_tick = 0; + +    std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; + +    std::vector<ImageViewInfo> image_view_infos; +    std::vector<ImageViewId> image_view_ids; + +    std::vector<u32> slice_offsets; +    std::vector<SubresourceBase> slice_subresources; + +    std::vector<AliasedImage> aliased_images; +}; + +struct ImageAllocBase { +    std::vector<ImageId> images; +}; + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 000000000..64fd7010a --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp @@ -0,0 +1,189 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +ImageInfo::ImageInfo(const TICEntry& config) noexcept { +    format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, +                                        config.a_type, config.srgb_conversion); +    num_samples = NumSamples(config.msaa_mode); +    resources.levels = config.max_mip_level + 1; +    if (config.IsPitchLinear()) { +        pitch = config.Pitch(); +    } else if (config.IsBlockLinear()) { +        block = Extent3D{ +            .width = config.block_width, +            .height = config.block_height, +            .depth = config.block_depth, +        }; +    } +    tile_width_spacing = config.tile_width_spacing; +    if (config.texture_type != TextureType::Texture2D && +        config.texture_type != TextureType::Texture2DNoMipmap) { +        ASSERT(!config.IsPitchLinear()); +    } +    switch (config.texture_type) { +    case TextureType::Texture1D: +        ASSERT(config.BaseLayer() == 0); +        type = ImageType::e1D; +        size.width = config.Width(); +        break; +    case TextureType::Texture1DArray: +        UNIMPLEMENTED_IF(config.BaseLayer() != 0); +        type = ImageType::e1D; +        size.width = config.Width(); +        resources.layers = config.Depth(); +        break; +    case TextureType::Texture2D: +    case TextureType::Texture2DNoMipmap: +        ASSERT(config.Depth() == 1); +        type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; +        size.width = config.Width(); +        size.height = config.Height(); +        resources.layers = config.BaseLayer() + 1; +        break; +    case TextureType::Texture2DArray: +        type = ImageType::e2D; +        size.width = config.Width(); +        size.height = config.Height(); +        resources.layers = config.BaseLayer() + config.Depth(); +        break; +    case TextureType::TextureCubemap: +        ASSERT(config.Depth() == 1); +        type = ImageType::e2D; +        size.width = config.Width(); +        size.height = config.Height(); +        resources.layers = config.BaseLayer() + 6; +        break; +    case TextureType::TextureCubeArray: +        UNIMPLEMENTED_IF(config.load_store_hint != 0); +        type = ImageType::e2D; +        size.width = config.Width(); +        size.height = config.Height(); +        resources.layers = config.BaseLayer() + config.Depth() * 6; +        break; +    case TextureType::Texture3D: +        ASSERT(config.BaseLayer() == 0); +        type = ImageType::e3D; +        size.width = config.Width(); +        size.height = config.Height(); +        size.depth = config.Depth(); +        break; +    case TextureType::Texture1DBuffer: +        type = ImageType::Buffer; +        size.width = config.Width(); +        break; +    default: +        UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); +        break; +    } +    if (type != ImageType::Linear) { +        // FIXME: Call this without passing *this +        layer_stride = CalculateLayerStride(*this); +        maybe_unaligned_layer_stride = CalculateLayerSize(*this); +    } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { +    const auto& rt = regs.rt[index]; +    format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); +    if (rt.tile_mode.is_pitch_linear) { +        ASSERT(rt.tile_mode.is_3d == 0); +        type = ImageType::Linear; +        pitch = rt.width; +        size = Extent3D{ +            .width = pitch / BytesPerBlock(format), +            .height = rt.height, +            .depth = 1, +        }; +        return; +    } +    size.width = rt.width; +    size.height = rt.height; +    layer_stride = rt.layer_stride * 4; +    maybe_unaligned_layer_stride = layer_stride; +    num_samples = NumSamples(regs.multisample_mode); +    block = Extent3D{ +        .width = rt.tile_mode.block_width, +        .height = rt.tile_mode.block_height, +        .depth = rt.tile_mode.block_depth, +    }; +    if (rt.tile_mode.is_3d) { +        type = ImageType::e3D; +        size.depth = rt.depth; +    } else { +        type = ImageType::e2D; +        resources.layers = rt.depth; +    } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { +    format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); +    size.width = regs.zeta_width; +    size.height = regs.zeta_height; +    resources.levels = 1; +    layer_stride = regs.zeta.layer_stride * 4; +    maybe_unaligned_layer_stride = layer_stride; +    num_samples = NumSamples(regs.multisample_mode); +    block = Extent3D{ +        .width = regs.zeta.tile_mode.block_width, +        .height = regs.zeta.tile_mode.block_height, +        .depth = regs.zeta.tile_mode.block_depth, +    }; +    if (regs.zeta.tile_mode.is_pitch_linear) { +        ASSERT(regs.zeta.tile_mode.is_3d == 0); +        type = ImageType::Linear; +        pitch = size.width * BytesPerBlock(format); +    } else if (regs.zeta.tile_mode.is_3d) { +        ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0); +        type = ImageType::e3D; +        size.depth = regs.zeta_depth; +    } else { +        type = ImageType::e2D; +        resources.layers = regs.zeta_depth; +    } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { +    UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); +    format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); +    if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { +        type = ImageType::Linear; +        size = Extent3D{ +            .width = config.pitch / VideoCore::Surface::BytesPerBlock(format), +            .height = config.height, +            .depth = 1, +        }; +        pitch = config.pitch; +    } else { +        type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; +        block = Extent3D{ +            .width = config.block_width, +            .height = config.block_height, +            .depth = config.block_depth, +        }; +        // 3D blits with more than once slice are not implemented for now +        // Render to individual slices +        size = Extent3D{ +            .width = config.width, +            .height = config.height, +            .depth = 1, +        }; +    } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 000000000..5049fc36e --- /dev/null +++ b/src/video_core/texture_cache/image_info.h @@ -0,0 +1,38 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +struct ImageInfo { +    explicit ImageInfo() = default; +    explicit ImageInfo(const TICEntry& config) noexcept; +    explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; +    explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; +    explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; + +    PixelFormat format = PixelFormat::Invalid; +    ImageType type = ImageType::e1D; +    SubresourceExtent resources; +    Extent3D size{1, 1, 1}; +    union { +        Extent3D block{0, 0, 0}; +        u32 pitch; +    }; +    u32 layer_stride = 0; +    u32 maybe_unaligned_layer_stride = 0; +    u32 num_samples = 1; +    u32 tile_width_spacing = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp new file mode 100644 index 000000000..076a4bcfd --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include "common/assert.h" +#include "core/settings.h" +#include "video_core/compatible_formats.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, +                             ImageId image_id_) +    : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, +      size{ +          .width = std::max(image_info.size.width >> range.base.level, 1u), +          .height = std::max(image_info.size.height >> range.base.level, 1u), +          .depth = std::max(image_info.size.depth >> range.base.level, 1u), +      } { +    ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format), +               "Image view format {} is incompatible with image format {}", info.format, +               image_info.format); +    const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); +    if (image_info.type == ImageType::Linear && is_async) { +        flags |= ImageViewFlagBits::PreemtiveDownload; +    } +    if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { +        flags |= ImageViewFlagBits::Slice; +    } +} + +ImageViewBase::ImageViewBase(const NullImageParams&) {} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h new file mode 100644 index 000000000..73954167e --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.h @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using VideoCore::Surface::PixelFormat; + +struct ImageViewInfo; +struct ImageInfo; + +struct NullImageParams {}; + +enum class ImageViewFlagBits : u16 { +    PreemtiveDownload = 1 << 0, +    Strong = 1 << 1, +    Slice = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) + +struct ImageViewBase { +    explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, +                           ImageId image_id); +    explicit ImageViewBase(const NullImageParams&); + +    [[nodiscard]] bool IsBuffer() const noexcept { +        return type == ImageViewType::Buffer; +    } + +    ImageId image_id{}; +    PixelFormat format{}; +    ImageViewType type{}; +    SubresourceRange range; +    Extent3D size{0, 0, 0}; +    ImageViewFlagBits flags{}; + +    u64 invalidation_tick = 0; +    u64 modification_tick = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp new file mode 100644 index 000000000..faf5b151f --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -0,0 +1,88 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <limits> + +#include "common/assert.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +namespace { + +constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max(); + +[[nodiscard]] u8 CastSwizzle(SwizzleSource source) { +    const u8 casted = static_cast<u8>(source); +    ASSERT(static_cast<SwizzleSource>(casted) == source); +    return casted; +} + +} // Anonymous namespace + +ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept +    : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)}, +      y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)}, +      w_source{CastSwizzle(config.w_source)} { +    range.base = SubresourceBase{ +        .level = static_cast<s32>(config.res_min_mip_level), +        .layer = base_layer, +    }; +    range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1; + +    switch (config.texture_type) { +    case TextureType::Texture1D: +        ASSERT(config.Height() == 1); +        ASSERT(config.Depth() == 1); +        type = ImageViewType::e1D; +        break; +    case TextureType::Texture2D: +    case TextureType::Texture2DNoMipmap: +        ASSERT(config.Depth() == 1); +        type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect; +        break; +    case TextureType::Texture3D: +        type = ImageViewType::e3D; +        break; +    case TextureType::TextureCubemap: +        ASSERT(config.Depth() == 1); +        type = ImageViewType::Cube; +        range.extent.layers = 6; +        break; +    case TextureType::Texture1DArray: +        type = ImageViewType::e1DArray; +        range.extent.layers = config.Depth(); +        break; +    case TextureType::Texture2DArray: +        type = ImageViewType::e2DArray; +        range.extent.layers = config.Depth(); +        break; +    case TextureType::Texture1DBuffer: +        type = ImageViewType::Buffer; +        break; +    case TextureType::TextureCubeArray: +        type = ImageViewType::CubeArray; +        range.extent.layers = config.Depth() * 6; +        break; +    default: +        UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); +        break; +    } +} + +ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_, +                             SubresourceRange range_) noexcept +    : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE}, +      y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE}, +      w_source{RENDER_TARGET_SWIZZLE} {} + +bool ImageViewInfo::IsRenderTarget() const noexcept { +    return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE && +           z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h new file mode 100644 index 000000000..0c1f99117 --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.h @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <type_traits> + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +/// Properties used to determine a image view +struct ImageViewInfo { +    explicit ImageViewInfo() noexcept = default; +    explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept; +    explicit ImageViewInfo(ImageViewType type, PixelFormat format, +                           SubresourceRange range = {}) noexcept; + +    auto operator<=>(const ImageViewInfo&) const noexcept = default; + +    [[nodiscard]] bool IsRenderTarget() const noexcept; + +    [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept { +        return std::array{ +            static_cast<SwizzleSource>(x_source), +            static_cast<SwizzleSource>(y_source), +            static_cast<SwizzleSource>(z_source), +            static_cast<SwizzleSource>(w_source), +        }; +    } + +    ImageViewType type{}; +    PixelFormat format{}; +    SubresourceRange range; +    u8 x_source = static_cast<u8>(SwizzleSource::R); +    u8 y_source = static_cast<u8>(SwizzleSource::G); +    u8 z_source = static_cast<u8>(SwizzleSource::B); +    u8 w_source = static_cast<u8>(SwizzleSource::A); +}; +static_assert(std::has_unique_object_representations_v<ImageViewInfo>); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h new file mode 100644 index 000000000..9b9544b07 --- /dev/null +++ b/src/video_core/texture_cache/render_targets.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <span> +#include <utility> + +#include "common/bit_cast.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +/// Framebuffer properties used to lookup a framebuffer +struct RenderTargets { +    constexpr auto operator<=>(const RenderTargets&) const noexcept = default; + +    constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept { +        const auto contains = [elements](ImageViewId item) { +            return std::ranges::find(elements, item) != elements.end(); +        }; +        return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id); +    } + +    std::array<ImageViewId, NUM_RT> color_buffer_ids; +    ImageViewId depth_buffer_id; +    std::array<u8, NUM_RT> draw_buffers{}; +    Extent2D size; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash<VideoCommon::RenderTargets> { +    size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept { +        using VideoCommon::ImageViewId; +        size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id); +        for (const ImageViewId color_buffer_id : rt.color_buffer_ids) { +            value ^= std::hash<ImageViewId>{}(color_buffer_id); +        } +        value ^= Common::BitCast<u64>(rt.draw_buffers); +        value ^= Common::BitCast<u64>(rt.size); +        return value; +    } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h new file mode 100644 index 000000000..04539a43c --- /dev/null +++ b/src/video_core/texture_cache/samples_helper.h @@ -0,0 +1,55 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <utility> + +#include "common/assert.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) { +    switch (num_samples) { +    case 1: +        return {0, 0}; +    case 2: +        return {1, 0}; +    case 4: +        return {1, 1}; +    case 8: +        return {2, 1}; +    case 16: +        return {2, 2}; +    } +    UNREACHABLE_MSG("Invalid number of samples={}", num_samples); +    return {1, 1}; +} + +[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { +    using Tegra::Texture::MsaaMode; +    switch (msaa_mode) { +    case MsaaMode::Msaa1x1: +        return 1; +    case MsaaMode::Msaa2x1: +    case MsaaMode::Msaa2x1_D3D: +        return 2; +    case MsaaMode::Msaa2x2: +    case MsaaMode::Msaa2x2_VC4: +    case MsaaMode::Msaa2x2_VC12: +        return 4; +    case MsaaMode::Msaa4x2: +    case MsaaMode::Msaa4x2_D3D: +    case MsaaMode::Msaa4x2_VC8: +    case MsaaMode::Msaa4x2_VC24: +        return 8; +    case MsaaMode::Msaa4x4: +        return 16; +    } +    UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode)); +    return 1; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h new file mode 100644 index 000000000..eae3be6ea --- /dev/null +++ b/src/video_core/texture_cache/slot_vector.h @@ -0,0 +1,156 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <concepts> +#include <numeric> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/assert.h" +#include "common/common_types.h" + +namespace VideoCommon { + +struct SlotId { +    static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max(); + +    constexpr auto operator<=>(const SlotId&) const noexcept = default; + +    constexpr explicit operator bool() const noexcept { +        return index != INVALID_INDEX; +    } + +    u32 index = INVALID_INDEX; +}; + +template <class T> +requires std::is_nothrow_move_assignable_v<T>&& +    std::is_nothrow_move_constructible_v<T> class SlotVector { +public: +    ~SlotVector() noexcept { +        size_t index = 0; +        for (u64 bits : stored_bitset) { +            for (size_t bit = 0; bits; ++bit, bits >>= 1) { +                if ((bits & 1) != 0) { +                    values[index + bit].object.~T(); +                } +            } +            index += 64; +        } +        delete[] values; +    } + +    [[nodiscard]] T& operator[](SlotId id) noexcept { +        ValidateIndex(id); +        return values[id.index].object; +    } + +    [[nodiscard]] const T& operator[](SlotId id) const noexcept { +        ValidateIndex(id); +        return values[id.index].object; +    } + +    template <typename... Args> +    [[nodiscard]] SlotId insert(Args&&... args) noexcept { +        const u32 index = FreeValueIndex(); +        new (&values[index].object) T(std::forward<Args>(args)...); +        SetStorageBit(index); + +        return SlotId{index}; +    } + +    void erase(SlotId id) noexcept { +        values[id.index].object.~T(); +        free_list.push_back(id.index); +        ResetStorageBit(id.index); +    } + +private: +    struct NonTrivialDummy { +        NonTrivialDummy() noexcept {} +    }; + +    union Entry { +        Entry() noexcept : dummy{} {} +        ~Entry() noexcept {} + +        NonTrivialDummy dummy; +        T object; +    }; + +    void SetStorageBit(u32 index) noexcept { +        stored_bitset[index / 64] |= u64(1) << (index % 64); +    } + +    void ResetStorageBit(u32 index) noexcept { +        stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); +    } + +    bool ReadStorageBit(u32 index) noexcept { +        return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; +    } + +    void ValidateIndex(SlotId id) const noexcept { +        DEBUG_ASSERT(id); +        DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); +        DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); +    } + +    [[nodiscard]] u32 FreeValueIndex() noexcept { +        if (free_list.empty()) { +            Reserve(values_capacity ? (values_capacity << 1) : 1); +        } +        const u32 free_index = free_list.back(); +        free_list.pop_back(); +        return free_index; +    } + +    void Reserve(size_t new_capacity) noexcept { +        Entry* const new_values = new Entry[new_capacity]; +        size_t index = 0; +        for (u64 bits : stored_bitset) { +            for (size_t bit = 0; bits; ++bit, bits >>= 1) { +                const size_t i = index + bit; +                if ((bits & 1) == 0) { +                    continue; +                } +                T& old_value = values[i].object; +                new (&new_values[i].object) T(std::move(old_value)); +                old_value.~T(); +            } +            index += 64; +        } + +        stored_bitset.resize((new_capacity + 63) / 64); + +        const size_t old_free_size = free_list.size(); +        free_list.resize(old_free_size + (new_capacity - values_capacity)); +        std::iota(free_list.begin() + old_free_size, free_list.end(), +                  static_cast<u32>(values_capacity)); + +        delete[] values; +        values = new_values; +        values_capacity = new_capacity; +    } + +    Entry* values = nullptr; +    size_t values_capacity = 0; +    size_t values_size = 0; + +    std::vector<u64> stored_bitset; +    std::vector<u32> free_list; +}; + +} // namespace VideoCommon + +template <> +struct std::hash<VideoCommon::SlotId> { +    size_t operator()(const VideoCommon::SlotId& id) const noexcept { +        return std::hash<u32>{}(id.index); +    } +}; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp deleted file mode 100644 index efbcf6723..000000000 --- a/src/video_core/texture_cache/surface_base.cpp +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/algorithm.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/convert.h" - -namespace VideoCommon { - -MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); - -using Tegra::Texture::ConvertFromGuestToHost; -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::IsPixelFormatASTC; -using VideoCore::Surface::PixelFormat; - -StagingCache::StagingCache() = default; - -StagingCache::~StagingCache() = default; - -SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, -                                 bool is_astc_supported_) -    : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels), -      mipmap_offsets(params.num_levels) { -    is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_; -    host_memory_size = params.GetHostSizeInBytes(is_converted); - -    std::size_t offset = 0; -    for (u32 level = 0; level < params.num_levels; ++level) { -        const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; -        mipmap_sizes[level] = mipmap_size; -        mipmap_offsets[level] = offset; -        offset += mipmap_size; -    } -    layer_size = offset; -    if (params.is_layered) { -        if (params.is_tiled) { -            layer_size = -                SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); -        } -        guest_memory_size = layer_size * params.depth; -    } else { -        guest_memory_size = layer_size; -    } -} - -MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { -    const u32 src_bpp{params.GetBytesPerPixel()}; -    const u32 dst_bpp{rhs.GetBytesPerPixel()}; -    const bool ib1 = params.IsBuffer(); -    const bool ib2 = rhs.IsBuffer(); -    if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { -        const bool cb1 = params.IsCompressed(); -        const bool cb2 = rhs.IsCompressed(); -        if (cb1 == cb2) { -            return MatchTopologyResult::FullMatch; -        } -        return MatchTopologyResult::CompressUnmatch; -    } -    return MatchTopologyResult::None; -} - -MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { -    // Buffer surface Check -    if (params.IsBuffer()) { -        const std::size_t wd1 = params.width * params.GetBytesPerPixel(); -        const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); -        if (wd1 == wd2) { -            return MatchStructureResult::FullMatch; -        } -        return MatchStructureResult::None; -    } - -    // Linear Surface check -    if (!params.is_tiled) { -        if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { -            if (params.width == rhs.width) { -                return MatchStructureResult::FullMatch; -            } else { -                return MatchStructureResult::SemiMatch; -            } -        } -        return MatchStructureResult::None; -    } - -    // Tiled Surface check -    if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, -                 params.tile_width_spacing, params.num_levels) == -        std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, -                 rhs.tile_width_spacing, rhs.num_levels)) { -        if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { -            return MatchStructureResult::FullMatch; -        } -        const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, -                                                   rhs.pixel_format); -        const u32 hs = -            SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); -        const u32 w1 = params.GetBlockAlignedWidth(); -        if (std::tie(w1, params.height) == std::tie(ws, hs)) { -            return MatchStructureResult::SemiMatch; -        } -    } -    return MatchStructureResult::None; -} - -std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( -    const GPUVAddr candidate_gpu_addr) const { -    if (gpu_addr == candidate_gpu_addr) { -        return {{0, 0}}; -    } - -    if (candidate_gpu_addr < gpu_addr) { -        return std::nullopt; -    } - -    const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; -    const auto layer{static_cast<u32>(relative_address / layer_size)}; -    if (layer >= params.depth) { -        return std::nullopt; -    } - -    const GPUVAddr mipmap_address = relative_address - layer_size * layer; -    const auto mipmap_it = -        Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); -    if (mipmap_it == mipmap_offsets.end()) { -        return std::nullopt; -    } - -    const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; -    return std::make_pair(layer, level); -} - -std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { -    const u32 layers{params.depth}; -    const u32 mipmaps{params.num_levels}; -    std::vector<CopyParams> result; -    result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); - -    for (u32 layer = 0; layer < layers; layer++) { -        for (u32 level = 0; level < mipmaps; level++) { -            const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); -            const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); -            result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); -        } -    } -    return result; -} - -std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { -    const u32 mipmaps{params.num_levels}; -    std::vector<CopyParams> result; -    result.reserve(mipmaps); - -    for (u32 level = 0; level < mipmaps; level++) { -        const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); -        const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); -        const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; -        result.emplace_back(width, height, depth, level); -    } -    return result; -} - -void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, -                                  const SurfaceParams& surface_params, u8* buffer, u32 level) { -    const u32 width{surface_params.GetMipWidth(level)}; -    const u32 height{surface_params.GetMipHeight(level)}; -    const u32 block_height{surface_params.GetMipBlockHeight(level)}; -    const u32 block_depth{surface_params.GetMipBlockDepth(level)}; - -    std::size_t guest_offset{mipmap_offsets[level]}; -    if (surface_params.is_layered) { -        std::size_t host_offset = 0; -        const std::size_t guest_stride = layer_size; -        const std::size_t host_stride = surface_params.GetHostLayerSize(level); -        for (u32 layer = 0; layer < surface_params.depth; ++layer) { -            MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, -                          block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset, -                          memory + guest_offset); -            guest_offset += guest_stride; -            host_offset += host_stride; -        } -    } else { -        MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth, -                      surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer, -                      memory + guest_offset); -    } -} - -void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, -                                 StagingCache& staging_cache) { -    MICROPROFILE_SCOPE(GPU_Load_Texture); -    auto& staging_buffer = staging_cache.GetBuffer(0); -    u8* host_ptr; -    // Use an extra temporal buffer -    auto& tmp_buffer = staging_cache.GetBuffer(1); -    tmp_buffer.resize(guest_memory_size); -    host_ptr = tmp_buffer.data(); -    memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - -    if (params.is_tiled) { -        ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", -                   params.block_width, static_cast<u32>(params.target)); -        for (u32 level = 0; level < params.num_levels; ++level) { -            const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; -            SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, -                        staging_buffer.data() + host_offset, level); -        } -    } else { -        ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); -        const u32 bpp{params.GetBytesPerPixel()}; -        const u32 block_width{params.GetDefaultBlockWidth()}; -        const u32 block_height{params.GetDefaultBlockHeight()}; -        const u32 width{(params.width + block_width - 1) / block_width}; -        const u32 height{(params.height + block_height - 1) / block_height}; -        const u32 copy_size{width * bpp}; -        if (params.pitch == copy_size) { -            std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); -        } else { -            const u8* start{host_ptr}; -            u8* write_to{staging_buffer.data()}; -            for (u32 h = height; h > 0; --h) { -                std::memcpy(write_to, start, copy_size); -                start += params.pitch; -                write_to += copy_size; -            } -        } -    } - -    if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { -        return; -    } - -    for (u32 level = params.num_levels; level--;) { -        const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; -        const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; -        u8* const in_buffer = staging_buffer.data() + in_host_offset; -        u8* const out_buffer = staging_buffer.data() + out_host_offset; -        ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, -                               params.GetMipWidth(level), params.GetMipHeight(level), -                               params.GetMipDepth(level), true, true); -    } -} - -void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, -                                  StagingCache& staging_cache) { -    MICROPROFILE_SCOPE(GPU_Flush_Texture); -    auto& staging_buffer = staging_cache.GetBuffer(0); -    u8* host_ptr; - -    // Use an extra temporal buffer -    auto& tmp_buffer = staging_cache.GetBuffer(1); -    tmp_buffer.resize(guest_memory_size); -    host_ptr = tmp_buffer.data(); - -    if (params.target == SurfaceTarget::Texture3D) { -        // Special case for 3D texture segments -        memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); -    } - -    if (params.is_tiled) { -        ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); -        for (u32 level = 0; level < params.num_levels; ++level) { -            const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; -            SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, -                        staging_buffer.data() + host_offset, level); -        } -    } else if (params.IsBuffer()) { -        // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest -        // memory. -        std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); -    } else { -        ASSERT(params.target == SurfaceTarget::Texture2D); -        ASSERT(params.num_levels == 1); - -        const u32 bpp{params.GetBytesPerPixel()}; -        const u32 copy_size{params.width * bpp}; -        if (params.pitch == copy_size) { -            std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); -        } else { -            u8* start{host_ptr}; -            const u8* read_to{staging_buffer.data()}; -            for (u32 h = params.height; h > 0; --h) { -                std::memcpy(start, read_to, copy_size); -                start += params.pitch; -                read_to += copy_size; -            } -        } -    } -    memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h deleted file mode 100644 index b57135fe4..000000000 --- a/src/video_core/texture_cache/surface_base.h +++ /dev/null @@ -1,333 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <optional> -#include <tuple> -#include <unordered_map> -#include <vector> - -#include "common/common_types.h" -#include "video_core/gpu.h" -#include "video_core/morton.h" -#include "video_core/texture_cache/copy_params.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon { - -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::SurfaceTarget; - -enum class MatchStructureResult : u32 { -    FullMatch = 0, -    SemiMatch = 1, -    None = 2, -}; - -enum class MatchTopologyResult : u32 { -    FullMatch = 0, -    CompressUnmatch = 1, -    None = 2, -}; - -class StagingCache { -public: -    explicit StagingCache(); -    ~StagingCache(); - -    std::vector<u8>& GetBuffer(std::size_t index) { -        return staging_buffer[index]; -    } - -    const std::vector<u8>& GetBuffer(std::size_t index) const { -        return staging_buffer[index]; -    } - -    void SetSize(std::size_t size) { -        staging_buffer.resize(size); -    } - -private: -    std::vector<std::vector<u8>> staging_buffer; -}; - -class SurfaceBaseImpl { -public: -    void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - -    void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - -    GPUVAddr GetGpuAddr() const { -        return gpu_addr; -    } - -    bool Overlaps(const VAddr start, const VAddr end) const { -        return (cpu_addr < end) && (cpu_addr_end > start); -    } - -    bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { -        const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; -        return gpu_addr <= other_start && other_end <= gpu_addr_end; -    } - -    // Use only when recycling a surface -    void SetGpuAddr(const GPUVAddr new_addr) { -        gpu_addr = new_addr; -    } - -    VAddr GetCpuAddr() const { -        return cpu_addr; -    } - -    VAddr GetCpuAddrEnd() const { -        return cpu_addr_end; -    } - -    void SetCpuAddr(const VAddr new_addr) { -        cpu_addr = new_addr; -        cpu_addr_end = new_addr + guest_memory_size; -    } - -    const SurfaceParams& GetSurfaceParams() const { -        return params; -    } - -    std::size_t GetSizeInBytes() const { -        return guest_memory_size; -    } - -    std::size_t GetHostSizeInBytes() const { -        return host_memory_size; -    } - -    std::size_t GetMipmapSize(const u32 level) const { -        return mipmap_sizes[level]; -    } - -    bool IsLinear() const { -        return !params.is_tiled; -    } - -    bool IsConverted() const { -        return is_converted; -    } - -    bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { -        return params.pixel_format == pixel_format; -    } - -    VideoCore::Surface::PixelFormat GetFormat() const { -        return params.pixel_format; -    } - -    bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { -        return params.target == target; -    } - -    MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; - -    MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; - -    bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { -        return std::tie(gpu_addr, params.target, params.num_levels) == -                   std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && -               params.target == SurfaceTarget::Texture2D && params.num_levels == 1; -    } - -    std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; - -    std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { -        return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); -    } - -protected: -    explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, -                             bool is_astc_supported_); -    ~SurfaceBaseImpl() = default; - -    virtual void DecorateSurfaceName() = 0; - -    const SurfaceParams params; -    std::size_t layer_size; -    std::size_t guest_memory_size; -    std::size_t host_memory_size; -    GPUVAddr gpu_addr{}; -    VAddr cpu_addr{}; -    VAddr cpu_addr_end{}; -    bool is_converted{}; - -    std::vector<std::size_t> mipmap_sizes; -    std::vector<std::size_t> mipmap_offsets; - -private: -    void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params, -                     u8* buffer, u32 level); - -    std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; - -    std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; -}; - -template <typename TView> -class SurfaceBase : public SurfaceBaseImpl { -public: -    virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; - -    virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; - -    void MarkAsModified(bool is_modified_, u64 tick) { -        is_modified = is_modified_ || is_target; -        modification_tick = tick; -    } - -    void MarkAsRenderTarget(bool is_target_, u32 index_) { -        is_target = is_target_; -        index = index_; -    } - -    void SetMemoryMarked(bool is_memory_marked_) { -        is_memory_marked = is_memory_marked_; -    } - -    bool IsMemoryMarked() const { -        return is_memory_marked; -    } - -    void SetSyncPending(bool is_sync_pending_) { -        is_sync_pending = is_sync_pending_; -    } - -    bool IsSyncPending() const { -        return is_sync_pending; -    } - -    void MarkAsPicked(bool is_picked_) { -        is_picked = is_picked_; -    } - -    bool IsModified() const { -        return is_modified; -    } - -    bool IsProtected() const { -        // Only 3D slices are to be protected -        return is_target && params.target == SurfaceTarget::Texture3D; -    } - -    bool IsRenderTarget() const { -        return is_target; -    } - -    u32 GetRenderTarget() const { -        return index; -    } - -    bool IsRegistered() const { -        return is_registered; -    } - -    bool IsPicked() const { -        return is_picked; -    } - -    void MarkAsRegistered(bool is_reg) { -        is_registered = is_reg; -    } - -    u64 GetModificationTick() const { -        return modification_tick; -    } - -    TView EmplaceOverview(const SurfaceParams& overview_params) { -        const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; -        return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); -    } - -    TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { -        return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, -                                  base_level, num_levels)); -    } - -    std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, -                                              const GPUVAddr view_addr, -                                              const std::size_t candidate_size, const u32 mipmap, -                                              const u32 layer) { -        const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; -        if (!layer_mipmap) { -            return {}; -        } -        const auto [end_layer, end_mipmap] = *layer_mipmap; -        if (layer != end_layer) { -            if (mipmap == 0 && end_mipmap == 0) { -                return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1)); -            } -            return {}; -        } else { -            return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap)); -        } -    } - -    std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, -                                     const std::size_t candidate_size) { -        if (params.target == SurfaceTarget::Texture3D || -            view_params.target == SurfaceTarget::Texture3D || -            (params.num_levels == 1 && !params.is_layered)) { -            return {}; -        } -        const auto layer_mipmap{GetLayerMipmap(view_addr)}; -        if (!layer_mipmap) { -            return {}; -        } -        const auto [layer, mipmap] = *layer_mipmap; -        if (GetMipmapSize(mipmap) != candidate_size) { -            return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); -        } -        return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); -    } - -    TView GetMainView() const { -        return main_view; -    } - -protected: -    explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_, -                         bool is_astc_supported_) -        : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {} - -    ~SurfaceBase() = default; - -    virtual TView CreateView(const ViewParams& view_key) = 0; - -    TView main_view; -    std::unordered_map<ViewParams, TView> views; - -private: -    TView GetView(const ViewParams& key) { -        const auto [entry, is_cache_miss] = views.try_emplace(key); -        auto& view{entry->second}; -        if (is_cache_miss) { -            view = CreateView(key); -        } -        return view; -    } - -    static constexpr u32 NO_RT = 0xFFFFFFFF; - -    bool is_modified{}; -    bool is_target{}; -    bool is_registered{}; -    bool is_picked{}; -    bool is_memory_marked{}; -    bool is_sync_pending{}; -    u32 index{NO_RT}; -    u64 modification_tick{}; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp deleted file mode 100644 index 96f93246d..000000000 --- a/src/video_core/texture_cache/surface_params.cpp +++ /dev/null @@ -1,445 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <string> -#include <tuple> - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "core/core.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceTargetFromTextureType; -using VideoCore::Surface::SurfaceType; - -namespace { - -SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { -    switch (type) { -    case Tegra::Shader::TextureType::Texture1D: -        return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; -    case Tegra::Shader::TextureType::Texture2D: -        return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; -    case Tegra::Shader::TextureType::Texture3D: -        ASSERT(!is_array); -        return SurfaceTarget::Texture3D; -    case Tegra::Shader::TextureType::TextureCube: -        return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; -    default: -        UNREACHABLE(); -        return SurfaceTarget::Texture2D; -    } -} - -SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { -    switch (type) { -    case Tegra::Shader::ImageType::Texture1D: -        return SurfaceTarget::Texture1D; -    case Tegra::Shader::ImageType::TextureBuffer: -        return SurfaceTarget::TextureBuffer; -    case Tegra::Shader::ImageType::Texture1DArray: -        return SurfaceTarget::Texture1DArray; -    case Tegra::Shader::ImageType::Texture2D: -        return SurfaceTarget::Texture2D; -    case Tegra::Shader::ImageType::Texture2DArray: -        return SurfaceTarget::Texture2DArray; -    case Tegra::Shader::ImageType::Texture3D: -        return SurfaceTarget::Texture3D; -    default: -        UNREACHABLE(); -        return SurfaceTarget::Texture2D; -    } -} - -constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { -    return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); -} - -} // Anonymous namespace - -SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, -                                              const Tegra::Texture::TICEntry& tic, -                                              const VideoCommon::Shader::Sampler& entry) { -    SurfaceParams params; -    params.is_tiled = tic.IsTiled(); -    params.srgb_conversion = tic.IsSrgbConversionEnabled(); -    params.block_width = params.is_tiled ? tic.BlockWidth() : 0; -    params.block_height = params.is_tiled ? tic.BlockHeight() : 0; -    params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; -    params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; -    params.pixel_format = lookup_table.GetPixelFormat( -        tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); -    params.type = GetFormatType(params.pixel_format); -    if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { -        switch (params.pixel_format) { -        case PixelFormat::R16_UNORM: -        case PixelFormat::R16_FLOAT: -            params.pixel_format = PixelFormat::D16_UNORM; -            break; -        case PixelFormat::R32_FLOAT: -            params.pixel_format = PixelFormat::D32_FLOAT; -            break; -        default: -            UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", -                              static_cast<u32>(params.pixel_format)); -        } -        params.type = GetFormatType(params.pixel_format); -    } -    // TODO: on 1DBuffer we should use the tic info. -    if (tic.IsBuffer()) { -        params.target = SurfaceTarget::TextureBuffer; -        params.width = tic.Width(); -        params.pitch = params.width * params.GetBytesPerPixel(); -        params.height = 1; -        params.depth = 1; -        params.num_levels = 1; -        params.emulated_levels = 1; -        params.is_layered = false; -    } else { -        params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array); -        params.width = tic.Width(); -        params.height = tic.Height(); -        params.depth = tic.Depth(); -        params.pitch = params.is_tiled ? 0 : tic.Pitch(); -        if (params.target == SurfaceTarget::TextureCubemap || -            params.target == SurfaceTarget::TextureCubeArray) { -            params.depth *= 6; -        } -        params.num_levels = tic.max_mip_level + 1; -        params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); -        params.is_layered = params.IsLayered(); -    } -    return params; -} - -SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, -                                            const Tegra::Texture::TICEntry& tic, -                                            const VideoCommon::Shader::Image& entry) { -    SurfaceParams params; -    params.is_tiled = tic.IsTiled(); -    params.srgb_conversion = tic.IsSrgbConversionEnabled(); -    params.block_width = params.is_tiled ? tic.BlockWidth() : 0; -    params.block_height = params.is_tiled ? tic.BlockHeight() : 0; -    params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; -    params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; -    params.pixel_format = lookup_table.GetPixelFormat( -        tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); -    params.type = GetFormatType(params.pixel_format); -    params.target = ImageTypeToSurfaceTarget(entry.type); -    // TODO: on 1DBuffer we should use the tic info. -    if (tic.IsBuffer()) { -        params.target = SurfaceTarget::TextureBuffer; -        params.width = tic.Width(); -        params.pitch = params.width * params.GetBytesPerPixel(); -        params.height = 1; -        params.depth = 1; -        params.num_levels = 1; -        params.emulated_levels = 1; -        params.is_layered = false; -    } else { -        params.width = tic.Width(); -        params.height = tic.Height(); -        params.depth = tic.Depth(); -        params.pitch = params.is_tiled ? 0 : tic.Pitch(); -        if (params.target == SurfaceTarget::TextureCubemap || -            params.target == SurfaceTarget::TextureCubeArray) { -            params.depth *= 6; -        } -        params.num_levels = tic.max_mip_level + 1; -        params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); -        params.is_layered = params.IsLayered(); -    } -    return params; -} - -SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { -    const auto& regs = maxwell3d.regs; -    const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); -    const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; -    const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); -    return { -        .is_tiled = regs.zeta.memory_layout.type == -                    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, -        .srgb_conversion = false, -        .is_layered = is_layered, -        .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), -        .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), -        .block_depth = block_depth, -        .tile_width_spacing = 1, -        .width = regs.zeta_width, -        .height = regs.zeta_height, -        .depth = is_layered ? regs.zeta_layers.Value() : 1U, -        .pitch = 0, -        .num_levels = 1, -        .emulated_levels = 1, -        .pixel_format = pixel_format, -        .type = GetFormatType(pixel_format), -        .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, -    }; -} - -SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, -                                                  std::size_t index) { -    const auto& config{maxwell3d.regs.rt[index]}; -    SurfaceParams params; -    params.is_tiled = -        config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; -    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || -                             config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; -    params.block_width = config.memory_layout.block_width; -    params.block_height = config.memory_layout.block_height; -    params.block_depth = config.memory_layout.block_depth; -    params.tile_width_spacing = 1; -    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); -    params.type = GetFormatType(params.pixel_format); -    if (params.is_tiled) { -        params.pitch = 0; -        params.width = config.width; -    } else { -        const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; -        params.pitch = config.width; -        params.width = params.pitch / bpp; -    } -    params.height = config.height; -    params.num_levels = 1; -    params.emulated_levels = 1; - -    if (config.memory_layout.is_3d != 0) { -        params.depth = config.layers.Value(); -        params.is_layered = false; -        params.target = SurfaceTarget::Texture3D; -    } else if (config.layers > 1) { -        params.depth = config.layers.Value(); -        params.is_layered = true; -        params.target = SurfaceTarget::Texture2DArray; -    } else { -        params.depth = 1; -        params.is_layered = false; -        params.target = SurfaceTarget::Texture2D; -    } -    return params; -} - -SurfaceParams SurfaceParams::CreateForFermiCopySurface( -    const Tegra::Engines::Fermi2D::Regs::Surface& config) { -    const bool is_tiled = !config.linear; -    const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); - -    SurfaceParams params{ -        .is_tiled = is_tiled, -        .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || -                           config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, -        .is_layered = false, -        .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, -        .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, -        .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, -        .tile_width_spacing = 1, -        .width = config.width, -        .height = config.height, -        .depth = 1, -        .pitch = config.pitch, -        .num_levels = 1, -        .emulated_levels = 1, -        .pixel_format = pixel_format, -        .type = GetFormatType(pixel_format), -        // TODO(Rodrigo): Try to guess texture arrays from parameters -        .target = SurfaceTarget::Texture2D, -    }; - -    params.is_layered = params.IsLayered(); -    return params; -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( -    const VideoCommon::Shader::Sampler& entry) { -    return TextureTypeToSurfaceTarget(entry.type, entry.is_array); -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( -    const VideoCommon::Shader::Image& entry) { -    return ImageTypeToSurfaceTarget(entry.type); -} - -bool SurfaceParams::IsLayered() const { -    switch (target) { -    case SurfaceTarget::Texture1DArray: -    case SurfaceTarget::Texture2DArray: -    case SurfaceTarget::TextureCubemap: -    case SurfaceTarget::TextureCubeArray: -        return true; -    default: -        return false; -    } -} - -// Auto block resizing algorithm from: -// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c -u32 SurfaceParams::GetMipBlockHeight(u32 level) const { -    if (level == 0) { -        return this->block_height; -    } - -    const u32 height_new{GetMipHeight(level)}; -    const u32 default_block_height{GetDefaultBlockHeight()}; -    const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; -    const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); -    return std::clamp(block_height_new, 3U, 7U) - 3U; -} - -u32 SurfaceParams::GetMipBlockDepth(u32 level) const { -    if (level == 0) { -        return this->block_depth; -    } -    if (is_layered) { -        return 0; -    } - -    const u32 depth_new{GetMipDepth(level)}; -    const u32 block_depth_new = Common::Log2Ceil32(depth_new); -    if (block_depth_new > 4) { -        return 5 - (GetMipBlockHeight(level) >= 2); -    } -    return block_depth_new; -} - -std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { -    std::size_t offset = 0; -    for (u32 i = 0; i < level; i++) { -        offset += GetInnerMipmapMemorySize(i, false, false); -    } -    return offset; -} - -std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { -    std::size_t offset = 0; -    if (is_converted) { -        for (u32 i = 0; i < level; ++i) { -            offset += GetConvertedMipmapSize(i) * GetNumLayers(); -        } -    } else { -        for (u32 i = 0; i < level; ++i) { -            offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); -        } -    } -    return offset; -} - -std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { -    constexpr std::size_t rgba8_bpp = 4ULL; -    const std::size_t mip_width = GetMipWidth(level); -    const std::size_t mip_height = GetMipHeight(level); -    const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); -    return mip_width * mip_height * mip_depth * rgba8_bpp; -} - -std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { -    std::size_t size = 0; -    for (u32 level = 0; level < num_levels; ++level) { -        size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); -    } -    if (is_tiled && is_layered) { -        return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); -    } -    return size; -} - -std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, -                                                    bool uncompressed) const { -    const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; -    const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; -    const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)}; -    if (is_tiled) { -        return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width, -                                             mip_height, mip_depth, GetMipBlockHeight(level), -                                             GetMipBlockDepth(level)); -    } else if (as_host_size || IsBuffer()) { -        return GetBytesPerPixel() * mip_width * mip_height * mip_depth; -    } else { -        // Linear Texture Case -        return pitch * mip_height * mip_depth; -    } -} - -bool SurfaceParams::operator==(const SurfaceParams& rhs) const { -    return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, -                    height, depth, pitch, num_levels, pixel_format, type, target) == -           std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, -                    rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, -                    rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); -} - -std::string SurfaceParams::TargetName() const { -    switch (target) { -    case SurfaceTarget::Texture1D: -        return "1D"; -    case SurfaceTarget::TextureBuffer: -        return "TexBuffer"; -    case SurfaceTarget::Texture2D: -        return "2D"; -    case SurfaceTarget::Texture3D: -        return "3D"; -    case SurfaceTarget::Texture1DArray: -        return "1DArray"; -    case SurfaceTarget::Texture2DArray: -        return "2DArray"; -    case SurfaceTarget::TextureCubemap: -        return "Cube"; -    case SurfaceTarget::TextureCubeArray: -        return "CubeArray"; -    default: -        LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); -        UNREACHABLE(); -        return fmt::format("TUK({})", target); -    } -} - -u32 SurfaceParams::GetBlockSize() const { -    const u32 x = 64U << block_width; -    const u32 y = 8U << block_height; -    const u32 z = 1U << block_depth; -    return x * y * z; -} - -std::pair<u32, u32> SurfaceParams::GetBlockXY() const { -    const u32 x_pixels = 64U / GetBytesPerPixel(); -    const u32 x = x_pixels << block_width; -    const u32 y = 8U << block_height; -    return {x, y}; -} - -std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { -    const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; -    const u32 block_size = GetBlockSize(); -    const u32 block_index = offset / block_size; -    const u32 gob_offset = offset % block_size; -    const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); -    const u32 x_gob_pixels = 64U / GetBytesPerPixel(); -    const u32 x_block_pixels = x_gob_pixels << block_width; -    const u32 y_block_pixels = 8U << block_height; -    const u32 z_block_pixels = 1U << block_depth; -    const u32 x_blocks = div_ceil(width, x_block_pixels); -    const u32 y_blocks = div_ceil(height, y_block_pixels); -    const u32 z_blocks = div_ceil(depth, z_block_pixels); -    const u32 base_x = block_index % x_blocks; -    const u32 base_y = (block_index / x_blocks) % y_blocks; -    const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; -    u32 x = base_x * x_block_pixels; -    u32 y = base_y * y_block_pixels; -    u32 z = base_z * z_block_pixels; -    z += gob_index >> block_height; -    y += (gob_index * 8U) % y_block_pixels; -    return {x, y, z}; -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h deleted file mode 100644 index 4466c3c34..000000000 --- a/src/video_core/texture_cache/surface_params.h +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <utility> - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCommon { - -class FormatLookupTable; - -class SurfaceParams { -public: -    /// Creates SurfaceCachedParams from a texture configuration. -    static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, -                                          const Tegra::Texture::TICEntry& tic, -                                          const VideoCommon::Shader::Sampler& entry); - -    /// Creates SurfaceCachedParams from an image configuration. -    static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, -                                        const Tegra::Texture::TICEntry& tic, -                                        const VideoCommon::Shader::Image& entry); - -    /// Creates SurfaceCachedParams for a depth buffer configuration. -    static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); - -    /// Creates SurfaceCachedParams from a framebuffer configuration. -    static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, -                                              std::size_t index); - -    /// Creates SurfaceCachedParams from a Fermi2D surface configuration. -    static SurfaceParams CreateForFermiCopySurface( -        const Tegra::Engines::Fermi2D::Regs::Surface& config); - -    /// Obtains the texture target from a shader's sampler entry. -    static VideoCore::Surface::SurfaceTarget ExpectedTarget( -        const VideoCommon::Shader::Sampler& entry); - -    /// Obtains the texture target from a shader's sampler entry. -    static VideoCore::Surface::SurfaceTarget ExpectedTarget( -        const VideoCommon::Shader::Image& entry); - -    std::size_t Hash() const { -        return static_cast<std::size_t>( -            Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); -    } - -    bool operator==(const SurfaceParams& rhs) const; - -    bool operator!=(const SurfaceParams& rhs) const { -        return !operator==(rhs); -    } - -    std::size_t GetGuestSizeInBytes() const { -        return GetInnerMemorySize(false, false, false); -    } - -    std::size_t GetHostSizeInBytes(bool is_converted) const { -        if (!is_converted) { -            return GetInnerMemorySize(true, false, false); -        } -        // ASTC is uncompressed in software, in emulated as RGBA8 -        std::size_t host_size_in_bytes = 0; -        for (u32 level = 0; level < num_levels; ++level) { -            host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); -        } -        return host_size_in_bytes; -    } - -    u32 GetBlockAlignedWidth() const { -        return Common::AlignUp(width, 64 / GetBytesPerPixel()); -    } - -    /// Returns the width of a given mipmap level. -    u32 GetMipWidth(u32 level) const { -        return std::max(1U, width >> level); -    } - -    /// Returns the height of a given mipmap level. -    u32 GetMipHeight(u32 level) const { -        return std::max(1U, height >> level); -    } - -    /// Returns the depth of a given mipmap level. -    u32 GetMipDepth(u32 level) const { -        return is_layered ? depth : std::max(1U, depth >> level); -    } - -    /// Returns the block height of a given mipmap level. -    u32 GetMipBlockHeight(u32 level) const; - -    /// Returns the block depth of a given mipmap level. -    u32 GetMipBlockDepth(u32 level) const; - -    /// Returns the best possible row/pitch alignment for the surface. -    u32 GetRowAlignment(u32 level, bool is_converted) const { -        const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); -        return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); -    } - -    /// Returns the offset in bytes in guest memory of a given mipmap level. -    std::size_t GetGuestMipmapLevelOffset(u32 level) const; - -    /// Returns the offset in bytes in host memory (linear) of a given mipmap level. -    std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; - -    /// Returns the size in bytes in guest memory of a given mipmap level. -    std::size_t GetGuestMipmapSize(u32 level) const { -        return GetInnerMipmapMemorySize(level, false, false); -    } - -    /// Returns the size in bytes in host memory (linear) of a given mipmap level. -    std::size_t GetHostMipmapSize(u32 level) const { -        return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); -    } - -    std::size_t GetConvertedMipmapSize(u32 level) const; - -    /// Get this texture Tegra Block size in guest memory layout -    u32 GetBlockSize() const; - -    /// Get X, Y coordinates max sizes of a single block. -    std::pair<u32, u32> GetBlockXY() const; - -    /// Get the offset in x, y, z coordinates from a memory offset -    std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const; - -    /// Returns the size of a layer in bytes in guest memory. -    std::size_t GetGuestLayerSize() const { -        return GetLayerSize(false, false); -    } - -    /// Returns the size of a layer in bytes in host memory for a given mipmap level. -    std::size_t GetHostLayerSize(u32 level) const { -        ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); -        return GetInnerMipmapMemorySize(level, true, false); -    } - -    /// Returns the max possible mipmap that the texture can have in host gpu -    u32 MaxPossibleMipmap() const { -        const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; -        const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; -        const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); -        if (target != VideoCore::Surface::SurfaceTarget::Texture3D) -            return max_mipmap; -        return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); -    } - -    /// Returns if the guest surface is a compressed surface. -    bool IsCompressed() const { -        return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; -    } - -    /// Returns the default block width. -    u32 GetDefaultBlockWidth() const { -        return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); -    } - -    /// Returns the default block height. -    u32 GetDefaultBlockHeight() const { -        return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); -    } - -    /// Returns the bits per pixel. -    u32 GetBitsPerPixel() const { -        return VideoCore::Surface::GetFormatBpp(pixel_format); -    } - -    /// Returns the bytes per pixel. -    u32 GetBytesPerPixel() const { -        return VideoCore::Surface::GetBytesPerPixel(pixel_format); -    } - -    /// Returns true if the pixel format is a depth and/or stencil format. -    bool IsPixelFormatZeta() const { -        return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && -               pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; -    } - -    /// Returns is the surface is a TextureBuffer type of surface. -    bool IsBuffer() const { -        return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; -    } - -    /// Returns the number of layers in the surface. -    std::size_t GetNumLayers() const { -        return is_layered ? depth : 1; -    } - -    /// Returns the debug name of the texture for use in graphic debuggers. -    std::string TargetName() const; - -    // Helper used for out of class size calculations -    static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, -                                    const u32 block_depth) { -        return Common::AlignBits(out_size, -                                 Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); -    } - -    /// Converts a width from a type of surface into another. This helps represent the -    /// equivalent value between compressed/non-compressed textures. -    static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, -                            VideoCore::Surface::PixelFormat pixel_format_to) { -        const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); -        const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); -        return (width * bw2 + bw1 - 1) / bw1; -    } - -    /// Converts a height from a type of surface into another. This helps represent the -    /// equivalent value between compressed/non-compressed textures. -    static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, -                             VideoCore::Surface::PixelFormat pixel_format_to) { -        const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); -        const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); -        return (height * bh2 + bh1 - 1) / bh1; -    } - -    // Finds the maximun possible width between 2 2D layers of different formats -    static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, -                              const u32 src_level, const u32 dst_level) { -        const u32 bw1 = src_params.GetDefaultBlockWidth(); -        const u32 bw2 = dst_params.GetDefaultBlockWidth(); -        const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; -        const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; -        return std::min(t_src_width, t_dst_width); -    } - -    // Finds the maximun possible height between 2 2D layers of different formats -    static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, -                               const u32 src_level, const u32 dst_level) { -        const u32 bh1 = src_params.GetDefaultBlockHeight(); -        const u32 bh2 = dst_params.GetDefaultBlockHeight(); -        const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; -        const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; -        return std::min(t_src_height, t_dst_height); -    } - -    bool is_tiled; -    bool srgb_conversion; -    bool is_layered; -    u32 block_width; -    u32 block_height; -    u32 block_depth; -    u32 tile_width_spacing; -    u32 width; -    u32 height; -    u32 depth; -    u32 pitch; -    u32 num_levels; -    u32 emulated_levels; -    VideoCore::Surface::PixelFormat pixel_format; -    VideoCore::Surface::SurfaceType type; -    VideoCore::Surface::SurfaceTarget target; - -private: -    /// Returns the size of a given mipmap level inside a layer. -    std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; - -    /// Returns the size of all mipmap levels and aligns as needed. -    std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { -        return GetLayerSize(as_host_size, uncompressed) * -               (layer_only ? 1U : (is_layered ? depth : 1U)); -    } - -    /// Returns the size of a layer -    std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - -    /// Returns true if these parameters are from a layered surface. -    bool IsLayered() const; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash<VideoCommon::SurfaceParams> { -    std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { -        return k.Hash(); -    } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp deleted file mode 100644 index 6b5f5984b..000000000 --- a/src/video_core/texture_cache/surface_view.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <tuple> - -#include "common/common_types.h" -#include "video_core/texture_cache/surface_view.h" - -namespace VideoCommon { - -std::size_t ViewParams::Hash() const { -    return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ -           (static_cast<std::size_t>(base_level) << 24) ^ -           (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); -} - -bool ViewParams::operator==(const ViewParams& rhs) const { -    return std::tie(base_layer, num_layers, base_level, num_levels, target) == -           std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); -} - -bool ViewParams::operator!=(const ViewParams& rhs) const { -    return !operator==(rhs); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h deleted file mode 100644 index 199f72732..000000000 --- a/src/video_core/texture_cache/surface_view.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <functional> - -#include "common/common_types.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -struct ViewParams { -    constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_, -                                  u32 num_layers_, u32 base_level_, u32 num_levels_) -        : target{target_}, base_layer{base_layer_}, num_layers{num_layers_}, -          base_level{base_level_}, num_levels{num_levels_} {} - -    std::size_t Hash() const; - -    bool operator==(const ViewParams& rhs) const; -    bool operator!=(const ViewParams& rhs) const; - -    bool IsLayered() const { -        switch (target) { -        case VideoCore::Surface::SurfaceTarget::Texture1DArray: -        case VideoCore::Surface::SurfaceTarget::Texture2DArray: -        case VideoCore::Surface::SurfaceTarget::TextureCubemap: -        case VideoCore::Surface::SurfaceTarget::TextureCubeArray: -            return true; -        default: -            return false; -        } -    } - -    VideoCore::Surface::SurfaceTarget target{}; -    u32 base_layer{}; -    u32 num_layers{}; -    u32 base_level{}; -    u32 num_levels{}; -}; - -class ViewBase { -public: -    constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {} - -    constexpr const ViewParams& GetViewParams() const { -        return params; -    } - -protected: -    ViewParams params; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash<VideoCommon::ViewParams> { -    std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { -        return k.Hash(); -    } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 581d8dd5b..968059842 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -6,1298 +6,1449 @@  #include <algorithm>  #include <array> -#include <list> +#include <bit>  #include <memory>  #include <mutex> -#include <set> -#include <tuple> +#include <optional> +#include <span> +#include <type_traits>  #include <unordered_map> +#include <utility>  #include <vector>  #include <boost/container/small_vector.hpp> -#include <boost/icl/interval_map.hpp> -#include <boost/range/iterator_range.hpp> -#include "common/assert.h" +#include "common/alignment.h" +#include "common/common_funcs.h"  #include "common/common_types.h" -#include "common/math_util.h" -#include "core/core.h" -#include "core/memory.h" -#include "core/settings.h" +#include "common/logging/log.h"  #include "video_core/compatible_formats.h" +#include "video_core/delayed_destruction_ring.h"  #include "video_core/dirty_flags.h"  #include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h"  #include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h"  #include "video_core/memory_manager.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/surface.h" -#include "video_core/texture_cache/copy_params.h" +#include "video_core/texture_cache/descriptor_table.h"  #include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra::Texture { -struct FullTextureInfo; -} - -namespace VideoCore { -class RasterizerInterface; -} +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h"  namespace VideoCommon { -using VideoCore::Surface::FormatCompatibility; +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible;  using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; -using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; -template <typename TSurface, typename TView> +template <class P>  class TextureCache { -    using VectorSurface = boost::container::small_vector<TSurface, 1>; +    /// Address shift for caching images into a hash table +    static constexpr u64 PAGE_SHIFT = 20; + +    /// Enables debugging features to the texture cache +    static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; +    /// Implement blits as copies between framebuffers +    static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; +    /// True when some copies have to be emulated +    static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + +    /// Image view ID for null descriptors +    static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; +    /// Sampler ID for bugged sampler ids +    static constexpr SamplerId NULL_SAMPLER_ID{0}; + +    using Runtime = typename P::Runtime; +    using Image = typename P::Image; +    using ImageAlloc = typename P::ImageAlloc; +    using ImageView = typename P::ImageView; +    using Sampler = typename P::Sampler; +    using Framebuffer = typename P::Framebuffer; + +    struct BlitImages { +        ImageId dst_id; +        ImageId src_id; +        PixelFormat dst_format; +        PixelFormat src_format; +    }; + +    template <typename T> +    struct IdentityHash { +        [[nodiscard]] size_t operator()(T value) const noexcept { +            return static_cast<size_t>(value); +        } +    };  public: -    void InvalidateRegion(VAddr addr, std::size_t size) { -        std::lock_guard lock{mutex}; +    explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, +                          Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); -        for (const auto& surface : GetSurfacesInRegion(addr, size)) { -            Unregister(surface); -        } -    } +    /// Notify the cache that a new frame has been queued +    void TickFrame(); -    void OnCPUWrite(VAddr addr, std::size_t size) { -        std::lock_guard lock{mutex}; +    /// Return an unique mutually exclusive lock for the cache +    [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); -        for (const auto& surface : GetSurfacesInRegion(addr, size)) { -            if (surface->IsMemoryMarked()) { -                UnmarkMemory(surface); -                surface->SetSyncPending(true); -                marked_for_unregister.emplace_back(surface); -            } -        } -    } +    /// Return a constant reference to the given image view id +    [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; -    void SyncGuestHost() { -        std::lock_guard lock{mutex}; +    /// Return a reference to the given image view id +    [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; -        for (const auto& surface : marked_for_unregister) { -            if (surface->IsRegistered()) { -                surface->SetSyncPending(false); -                Unregister(surface); -            } -        } -        marked_for_unregister.clear(); -    } +    /// Fill image_view_ids with the graphics images in indices +    void FillGraphicsImageViews(std::span<const u32> indices, +                                std::span<ImageViewId> image_view_ids); -    /** -     * Guarantees that rendertargets don't unregister themselves if the -     * collide. Protection is currently only done on 3D slices. -     */ -    void GuardRenderTargets(bool new_guard) { -        guard_render_targets = new_guard; -    } +    /// Fill image_view_ids with the compute images in indices +    void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); -    void GuardSamplers(bool new_guard) { -        guard_samplers = new_guard; -    } +    /// Get the sampler from the graphics descriptor table in the specified index +    Sampler* GetGraphicsSampler(u32 index); -    void FlushRegion(VAddr addr, std::size_t size) { -        std::lock_guard lock{mutex}; +    /// Get the sampler from the compute descriptor table in the specified index +    Sampler* GetComputeSampler(u32 index); -        auto surfaces = GetSurfacesInRegion(addr, size); -        if (surfaces.empty()) { -            return; -        } -        std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { -            return a->GetModificationTick() < b->GetModificationTick(); -        }); -        for (const auto& surface : surfaces) { -            mutex.unlock(); -            FlushSurface(surface); -            mutex.lock(); -        } -    } +    /// Refresh the state for graphics image view and sampler descriptors +    void SynchronizeGraphicsDescriptors(); -    bool MustFlushRegion(VAddr addr, std::size_t size) { -        std::lock_guard lock{mutex}; +    /// Refresh the state for compute image view and sampler descriptors +    void SynchronizeComputeDescriptors(); -        const auto surfaces = GetSurfacesInRegion(addr, size); -        return std::any_of(surfaces.cbegin(), surfaces.cend(), -                           [](const TSurface& surface) { return surface->IsModified(); }); -    } +    /// Update bound render targets and upload memory if necessary +    /// @param is_clear True when the render targets are being used for clears +    void UpdateRenderTargets(bool is_clear); -    TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, -                            const VideoCommon::Shader::Sampler& entry) { -        std::lock_guard lock{mutex}; -        const auto gpu_addr{tic.Address()}; -        if (!gpu_addr) { -            return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); -        } +    /// Find a framebuffer with the currently bound render targets +    /// UpdateRenderTargets should be called before this +    Framebuffer* GetFramebuffer(); -        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); -        if (!cpu_addr) { -            return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); -        } +    /// Mark images in a range as modified from the CPU +    void WriteMemory(VAddr cpu_addr, size_t size); -        if (!IsTypeCompatible(tic.texture_type, entry)) { -            return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); -        } +    /// Download contents of host images to guest memory in a region +    void DownloadMemory(VAddr cpu_addr, size_t size); -        const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; -        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); -        if (guard_samplers) { -            sampled_textures.push_back(surface); -        } -        return view; -    } +    /// Remove images in a region +    void UnmapMemory(VAddr cpu_addr, size_t size); -    TView GetImageSurface(const Tegra::Texture::TICEntry& tic, -                          const VideoCommon::Shader::Image& entry) { -        std::lock_guard lock{mutex}; -        const auto gpu_addr{tic.Address()}; -        if (!gpu_addr) { -            return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); -        } -        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); -        if (!cpu_addr) { -            return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); -        } -        const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; -        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); -        if (guard_samplers) { -            sampled_textures.push_back(surface); -        } -        return view; -    } +    /// Blit an image with the given parameters +    void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, +                   const Tegra::Engines::Fermi2D::Surface& src, +                   const Tegra::Engines::Fermi2D::Config& copy); -    bool TextureBarrier() { -        const bool any_rt = -            std::any_of(sampled_textures.begin(), sampled_textures.end(), -                        [](const auto& surface) { return surface->IsRenderTarget(); }); -        sampled_textures.clear(); -        return any_rt; -    } +    /// Invalidate the contents of the color buffer index +    /// These contents become unspecified, the cache can assume aggressive optimizations. +    void InvalidateColorBuffer(size_t index); -    TView GetDepthBufferSurface(bool preserve_contents) { -        std::lock_guard lock{mutex}; -        auto& dirty = maxwell3d.dirty; -        if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { -            return depth_buffer.view; -        } -        dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; +    /// Invalidate the contents of the depth buffer +    /// These contents become unspecified, the cache can assume aggressive optimizations. +    void InvalidateDepthBuffer(); -        const auto& regs{maxwell3d.regs}; -        const auto gpu_addr{regs.zeta.Address()}; -        if (!gpu_addr || !regs.zeta_enable) { -            SetEmptyDepthBuffer(); -            return {}; -        } -        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); -        if (!cpu_addr) { -            SetEmptyDepthBuffer(); -            return {}; -        } -        const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; -        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); -        if (depth_buffer.target) -            depth_buffer.target->MarkAsRenderTarget(false, NO_RT); -        depth_buffer.target = surface_view.first; -        depth_buffer.view = surface_view.second; -        if (depth_buffer.target) -            depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); -        return surface_view.second; -    } - -    TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { -        std::lock_guard lock{mutex}; -        ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); -        if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { -            return render_targets[index].view; -        } -        maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; +    /// Try to find a cached image view in the given CPU address +    [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); -        const auto& regs{maxwell3d.regs}; -        if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || -            regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { -            SetEmptyColorBuffer(index); -            return {}; -        } +    /// Return true when there are uncommitted images to be downloaded +    [[nodiscard]] bool HasUncommittedFlushes() const noexcept; -        const auto& config{regs.rt[index]}; -        const auto gpu_addr{config.Address()}; -        if (!gpu_addr) { -            SetEmptyColorBuffer(index); -            return {}; -        } +    /// Return true when the caller should wait for async downloads +    [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; -        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); -        if (!cpu_addr) { -            SetEmptyColorBuffer(index); -            return {}; -        } +    /// Commit asynchronous downloads +    void CommitAsyncFlushes(); + +    /// Pop asynchronous downloads +    void PopAsyncFlushes(); + +    /// Return true when a CPU region is modified from the GPU +    [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); -        auto surface_view = -            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), -                       preserve_contents, true); -        if (render_targets[index].target) { -            auto& surface = render_targets[index].target; -            surface->MarkAsRenderTarget(false, NO_RT); -            const auto& cr_params = surface->GetSurfaceParams(); -            if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { -                AsyncFlushSurface(surface); +private: +    /// Iterate over all page indices in a range +    template <typename Func> +    static void ForEachPage(VAddr addr, size_t size, Func&& func) { +        static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; +        const u64 page_end = (addr + size - 1) >> PAGE_SHIFT; +        for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { +            if constexpr (RETURNS_BOOL) { +                if (func(page)) { +                    break; +                } +            } else { +                func(page);              }          } -        render_targets[index].target = surface_view.first; -        render_targets[index].view = surface_view.second; -        if (render_targets[index].target) -            render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); -        return surface_view.second;      } -    void MarkColorBufferInUse(std::size_t index) { -        if (auto& render_target = render_targets[index].target) { -            render_target->MarkAsModified(true, Tick()); -        } -    } +    /// Fills image_view_ids in the image views in indices +    void FillImageViews(DescriptorTable<TICEntry>& table, +                        std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, +                        std::span<ImageViewId> image_view_ids); -    void MarkDepthBufferInUse() { -        if (depth_buffer.target) { -            depth_buffer.target->MarkAsModified(true, Tick()); -        } -    } +    /// Find or create an image view in the guest descriptor table +    ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, +                               std::span<ImageViewId> cached_image_view_ids, u32 index); -    void SetEmptyDepthBuffer() { -        if (depth_buffer.target == nullptr) { -            return; -        } -        depth_buffer.target->MarkAsRenderTarget(false, NO_RT); -        depth_buffer.target = nullptr; -        depth_buffer.view = nullptr; -    } +    /// Find or create a framebuffer with the given render target parameters +    FramebufferId GetFramebufferId(const RenderTargets& key); -    void SetEmptyColorBuffer(std::size_t index) { -        if (render_targets[index].target == nullptr) { -            return; -        } -        render_targets[index].target->MarkAsRenderTarget(false, NO_RT); -        render_targets[index].target = nullptr; -        render_targets[index].view = nullptr; -    } - -    void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, -                     const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, -                     const Tegra::Engines::Fermi2D::Config& copy_config) { -        std::lock_guard lock{mutex}; -        SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); -        SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); -        const GPUVAddr src_gpu_addr = src_config.Address(); -        const GPUVAddr dst_gpu_addr = dst_config.Address(); -        DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - -        const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); -        const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); -        std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); -        TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; -        ImageBlit(src_surface, dst_surface.second, copy_config); -        dst_surface.first->MarkAsModified(true, Tick()); -    } - -    TSurface TryFindFramebufferSurface(VAddr addr) const { -        if (!addr) { -            return nullptr; -        } -        const VAddr page = addr >> registry_page_bits; -        const auto it = registry.find(page); -        if (it == registry.end()) { -            return nullptr; -        } -        const auto& list = it->second; -        const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { -            return surface->GetCpuAddr() == addr; -        }); -        return found != list.end() ? *found : nullptr; -    } +    /// Refresh the contents (pixel data) of an image +    void RefreshContents(Image& image); -    u64 Tick() { -        return ++ticks; -    } +    /// Upload data from guest to an image +    template <typename MapBuffer> +    void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); -    void CommitAsyncFlushes() { -        committed_flushes.push_back(uncommitted_flushes); -        uncommitted_flushes.reset(); -    } +    /// Find or create an image view from a guest descriptor +    [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); -    bool HasUncommittedFlushes() const { -        return uncommitted_flushes != nullptr; -    } +    /// Create a new image view from a guest descriptor +    [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); -    bool ShouldWaitAsyncFlushes() const { -        return !committed_flushes.empty() && committed_flushes.front() != nullptr; -    } +    /// Find or create an image from the given parameters +    [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, +                                            RelaxedOptions options = RelaxedOptions{}); -    void PopAsyncFlushes() { -        if (committed_flushes.empty()) { -            return; -        } -        auto& flush_list = committed_flushes.front(); -        if (!flush_list) { -            committed_flushes.pop_front(); -            return; -        } -        for (TSurface& surface : *flush_list) { -            FlushSurface(surface); -        } -        committed_flushes.pop_front(); -    } +    /// Find an image from the given parameters +    [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, +                                    RelaxedOptions options); -protected: -    explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, -                          Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, -                          bool is_astc_supported_) -        : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, -          gpu_memory{gpu_memory_} { -        for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { -            SetEmptyColorBuffer(i); -        } +    /// Create an image from the given parameters +    [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, +                                      RelaxedOptions options); -        SetEmptyDepthBuffer(); -        staging_cache.SetSize(2); +    /// Create a new image and join perfectly matching existing images +    /// Remove joined images from the cache +    [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); -        const auto make_siblings = [this](PixelFormat a, PixelFormat b) { -            siblings_table[static_cast<std::size_t>(a)] = b; -            siblings_table[static_cast<std::size_t>(b)] = a; -        }; -        std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); -        make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); -        make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); -        make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); +    /// Return a blit image pair from the given guest blit parameters +    [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, +                                           const Tegra::Engines::Fermi2D::Surface& src); -        sampled_textures.reserve(64); -    } +    /// Find or create a sampler from a guest descriptor sampler +    [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); -    ~TextureCache() = default; +    /// Find or create an image view for the given color buffer index +    [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); -    virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; +    /// Find or create an image view for the depth buffer +    [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); -    virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, -                           const CopyParams& copy_params) = 0; +    /// Find or create a view for a render target with the given image parameters +    [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, +                                                   bool is_clear); -    virtual void ImageBlit(TView& src_view, TView& dst_view, -                           const Tegra::Engines::Fermi2D::Config& copy_config) = 0; +    /// Iterates over all the images in a region calling func +    template <typename Func> +    void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); -    // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture -    // and reading it from a separate buffer. -    virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; +    /// Find or create an image view in the given image with the passed parameters +    [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); -    void ManageRenderTargetUnregister(TSurface& surface) { -        auto& dirty = maxwell3d.dirty; -        const u32 index = surface->GetRenderTarget(); -        if (index == DEPTH_RT) { -            dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; -        } else { -            dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; -        } -        dirty.flags[VideoCommon::Dirty::RenderTargets] = true; +    /// Register image in the page table +    void RegisterImage(ImageId image); + +    /// Unregister image from the page table +    void UnregisterImage(ImageId image); + +    /// Track CPU reads and writes for image +    void TrackImage(ImageBase& image); + +    /// Stop tracking CPU reads and writes for image +    void UntrackImage(ImageBase& image); + +    /// Delete image from the cache +    void DeleteImage(ImageId image); + +    /// Remove image views references from the cache +    void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); + +    /// Remove framebuffers using the given image views from the cache +    void RemoveFramebuffers(std::span<const ImageViewId> removed_views); + +    /// Mark an image as modified from the GPU +    void MarkModification(ImageBase& image) noexcept; + +    /// Synchronize image aliases, copying data if needed +    void SynchronizeAliases(ImageId image_id); + +    /// Prepare an image to be used +    void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + +    /// Prepare an image view to be used +    void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); + +    /// Execute copies from one image to the other, even if they are incompatible +    void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); + +    /// Bind an image view as render target, downloading resources preemtively if needed +    void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); + +    /// Create a render target from a given image and image view parameters +    [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( +        ImageId, const ImageViewInfo& view_info); + +    /// Returns true if the current clear parameters clear the whole image of a given image view +    [[nodiscard]] bool IsFullClear(ImageViewId id); + +    Runtime& runtime; +    VideoCore::RasterizerInterface& rasterizer; +    Tegra::Engines::Maxwell3D& maxwell3d; +    Tegra::Engines::KeplerCompute& kepler_compute; +    Tegra::MemoryManager& gpu_memory; + +    DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; +    DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; +    std::vector<SamplerId> graphics_sampler_ids; +    std::vector<ImageViewId> graphics_image_view_ids; + +    DescriptorTable<TICEntry> compute_image_table{gpu_memory}; +    DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; +    std::vector<SamplerId> compute_sampler_ids; +    std::vector<ImageViewId> compute_image_view_ids; + +    RenderTargets render_targets; + +    std::mutex mutex; + +    std::unordered_map<TICEntry, ImageViewId> image_views; +    std::unordered_map<TSCEntry, SamplerId> samplers; +    std::unordered_map<RenderTargets, FramebufferId> framebuffers; + +    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; + +    bool has_deleted_images = false; + +    SlotVector<Image> slot_images; +    SlotVector<ImageView> slot_image_views; +    SlotVector<ImageAlloc> slot_image_allocs; +    SlotVector<Sampler> slot_samplers; +    SlotVector<Framebuffer> slot_framebuffers; + +    // TODO: This data structure is not optimal and it should be reworked +    std::vector<ImageId> uncommitted_downloads; +    std::queue<std::vector<ImageId>> committed_downloads; + +    static constexpr size_t TICKS_TO_DESTROY = 6; +    DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; +    DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; +    DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; + +    std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; + +    u64 modification_tick = 0; +    u64 frame_tick = 0; +}; + +template <class P> +TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, +                              Tegra::Engines::Maxwell3D& maxwell3d_, +                              Tegra::Engines::KeplerCompute& kepler_compute_, +                              Tegra::MemoryManager& gpu_memory_) +    : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, +      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { +    // Configure null sampler +    TSCEntry sampler_descriptor{}; +    sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); +    sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); +    sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); +    sampler_descriptor.cubemap_anisotropy.Assign(1); + +    // Make sure the first index is reserved for the null resources +    // This way the null resource becomes a compile time constant +    void(slot_image_views.insert(runtime, NullImageParams{})); +    void(slot_samplers.insert(runtime, sampler_descriptor)); +} + +template <class P> +void TextureCache<P>::TickFrame() { +    // Tick sentenced resources in this order to ensure they are destroyed in the right order +    sentenced_images.Tick(); +    sentenced_framebuffers.Tick(); +    sentenced_image_view.Tick(); +    ++frame_tick; +} + +template <class P> +std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { +    return std::unique_lock{mutex}; +} + +template <class P> +const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { +    return slot_image_views[id]; +} + +template <class P> +typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { +    return slot_image_views[id]; +} + +template <class P> +void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, +                                             std::span<ImageViewId> image_view_ids) { +    FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} + +template <class P> +void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, +                                            std::span<ImageViewId> image_view_ids) { +    FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} + +template <class P> +typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { +    [[unlikely]] if (index > graphics_sampler_table.Limit()) { +        LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); +        return &slot_samplers[NULL_SAMPLER_ID]; +    } +    const auto [descriptor, is_new] = graphics_sampler_table.Read(index); +    SamplerId& id = graphics_sampler_ids[index]; +    [[unlikely]] if (is_new) { +        id = FindSampler(descriptor);      } +    return &slot_samplers[id]; +} -    void Register(TSurface surface) { -        const GPUVAddr gpu_addr = surface->GetGpuAddr(); -        const std::size_t size = surface->GetSizeInBytes(); -        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); -        if (!cpu_addr) { -            LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", -                         gpu_addr); -            return; -        } -        surface->SetCpuAddr(*cpu_addr); -        RegisterInnerCache(surface); -        surface->MarkAsRegistered(true); -        surface->SetMemoryMarked(true); -        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); +template <class P> +typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { +    [[unlikely]] if (index > compute_sampler_table.Limit()) { +        LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); +        return &slot_samplers[NULL_SAMPLER_ID]; +    } +    const auto [descriptor, is_new] = compute_sampler_table.Read(index); +    SamplerId& id = compute_sampler_ids[index]; +    [[unlikely]] if (is_new) { +        id = FindSampler(descriptor);      } +    return &slot_samplers[id]; +} -    void UnmarkMemory(TSurface surface) { -        if (!surface->IsMemoryMarked()) { -            return; -        } -        const std::size_t size = surface->GetSizeInBytes(); -        const VAddr cpu_addr = surface->GetCpuAddr(); -        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); -        surface->SetMemoryMarked(false); +template <class P> +void TextureCache<P>::SynchronizeGraphicsDescriptors() { +    using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; +    const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; +    const u32 tic_limit = maxwell3d.regs.tic.limit; +    const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; +    if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { +        graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);      } +    if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { +        graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); +    } +} -    void Unregister(TSurface surface) { -        if (guard_render_targets && surface->IsProtected()) { -            return; -        } -        if (!guard_render_targets && surface->IsRenderTarget()) { -            ManageRenderTargetUnregister(surface); -        } -        UnmarkMemory(surface); -        if (surface->IsSyncPending()) { -            marked_for_unregister.remove(surface); -            surface->SetSyncPending(false); -        } -        UnregisterInnerCache(surface); -        surface->MarkAsRegistered(false); -        ReserveSurface(surface->GetSurfaceParams(), surface); +template <class P> +void TextureCache<P>::SynchronizeComputeDescriptors() { +    const bool linked_tsc = kepler_compute.launch_description.linked_tsc; +    const u32 tic_limit = kepler_compute.regs.tic.limit; +    const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; +    const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); +    if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { +        compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);      } +    if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { +        compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); +    } +} -    TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { -        if (const auto surface = TryGetReservedSurface(params); surface) { -            surface->SetGpuAddr(gpu_addr); -            return surface; -        } -        // No reserved surface available, create a new one and reserve it -        auto new_surface{CreateSurface(gpu_addr, params)}; -        return new_surface; +template <class P> +void TextureCache<P>::UpdateRenderTargets(bool is_clear) { +    using namespace VideoCommon::Dirty; +    auto& flags = maxwell3d.dirty.flags; +    if (!flags[Dirty::RenderTargets]) { +        return;      } +    flags[Dirty::RenderTargets] = false; -    const bool is_astc_supported; +    // Render target control is used on all render targets, so force look ups when this one is up +    const bool force = flags[Dirty::RenderTargetControl]; +    flags[Dirty::RenderTargetControl] = false; -private: -    enum class RecycleStrategy : u32 { -        Ignore = 0, -        Flush = 1, -        BufferCopy = 3, -    }; +    for (size_t index = 0; index < NUM_RT; ++index) { +        ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; +        if (flags[Dirty::ColorBuffer0 + index] || force) { +            flags[Dirty::ColorBuffer0 + index] = false; +            BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); +        } +        PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); +    } +    if (flags[Dirty::ZetaBuffer] || force) { +        flags[Dirty::ZetaBuffer] = false; +        BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); +    } +    const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; +    PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); -    enum class DeductionType : u32 { -        DeductionComplete, -        DeductionIncomplete, -        DeductionFailed, +    for (size_t index = 0; index < NUM_RT; ++index) { +        render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); +    } +    render_targets.size = Extent2D{ +        maxwell3d.regs.render_area.width, +        maxwell3d.regs.render_area.height,      }; +} -    struct Deduction { -        DeductionType type{DeductionType::DeductionFailed}; -        TSurface surface{}; +template <class P> +typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { +    return &slot_framebuffers[GetFramebufferId(render_targets)]; +} -        bool Failed() const { -            return type == DeductionType::DeductionFailed; -        } +template <class P> +void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, +                                     std::span<ImageViewId> cached_image_view_ids, +                                     std::span<const u32> indices, +                                     std::span<ImageViewId> image_view_ids) { +    ASSERT(indices.size() <= image_view_ids.size()); +    do { +        has_deleted_images = false; +        std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { +            return VisitImageView(table, cached_image_view_ids, index); +        }); +    } while (has_deleted_images); +} -        bool Incomplete() const { -            return type == DeductionType::DeductionIncomplete; -        } +template <class P> +ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, +                                            std::span<ImageViewId> cached_image_view_ids, +                                            u32 index) { +    if (index > table.Limit()) { +        LOG_ERROR(HW_GPU, "Invalid image view index={}", index); +        return NULL_IMAGE_VIEW_ID; +    } +    const auto [descriptor, is_new] = table.Read(index); +    ImageViewId& image_view_id = cached_image_view_ids[index]; +    if (is_new) { +        image_view_id = FindImageView(descriptor); +    } +    if (image_view_id != NULL_IMAGE_VIEW_ID) { +        PrepareImageView(image_view_id, false, false); +    } +    return image_view_id; +} -        bool IsDepth() const { -            return surface->GetSurfaceParams().IsPixelFormatZeta(); -        } -    }; +template <class P> +FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { +    const auto [pair, is_new] = framebuffers.try_emplace(key); +    FramebufferId& framebuffer_id = pair->second; +    if (!is_new) { +        return framebuffer_id; +    } +    std::array<ImageView*, NUM_RT> color_buffers; +    std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), +                           [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); +    ImageView* const depth_buffer = +        key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; +    framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); +    return framebuffer_id; +} -    /** -     * Takes care of selecting a proper strategy to deal with a texture recycle. -     * -     * @param overlaps      The overlapping surfaces registered in the cache. -     * @param params        The parameters on the new surface. -     * @param gpu_addr      The starting address of the new surface. -     * @param untopological Indicates to the recycler that the texture has no way -     *                      to match the overlaps due to topological reasons. -     **/ -    RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, -                                 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { -        if (Settings::IsGPULevelExtreme()) { -            return RecycleStrategy::Flush; -        } -        // 3D Textures decision -        if (params.target == SurfaceTarget::Texture3D) { -            return RecycleStrategy::Flush; -        } -        for (const auto& s : overlaps) { -            const auto& s_params = s->GetSurfaceParams(); -            if (s_params.target == SurfaceTarget::Texture3D) { -                return RecycleStrategy::Flush; -            } -        } -        // Untopological decision -        if (untopological == MatchTopologyResult::CompressUnmatch) { -            return RecycleStrategy::Flush; -        } -        if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { -            return RecycleStrategy::Flush; -        } -        return RecycleStrategy::Ignore; -    } - -    /** -     * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented -     * strategies: Ignore and Flush. -     * -     * - Ignore: Just unregisters all the overlaps and loads the new texture. -     * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. -     * -     * @param overlaps          The overlapping surfaces registered in the cache. -     * @param params            The parameters for the new surface. -     * @param gpu_addr          The starting address of the new surface. -     * @param preserve_contents Indicates that the new surface should be loaded from memory or left -     *                          blank. -     * @param untopological     Indicates to the recycler that the texture has no way to match the -     *                          overlaps due to topological reasons. -     **/ -    std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, -                                              const GPUVAddr gpu_addr, const bool preserve_contents, -                                              const MatchTopologyResult untopological) { -        const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); -        for (auto& surface : overlaps) { -            Unregister(surface); -        } -        switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { -        case RecycleStrategy::Ignore: { -            return InitializeSurface(gpu_addr, params, do_load); -        } -        case RecycleStrategy::Flush: { -            std::sort(overlaps.begin(), overlaps.end(), -                      [](const TSurface& a, const TSurface& b) -> bool { -                          return a->GetModificationTick() < b->GetModificationTick(); -                      }); -            for (auto& surface : overlaps) { -                FlushSurface(surface); -            } -            return InitializeSurface(gpu_addr, params, preserve_contents); +template <class P> +void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { +    ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { +        if (True(image.flags & ImageFlagBits::CpuModified)) { +            return;          } -        case RecycleStrategy::BufferCopy: { -            auto new_surface = GetUncachedSurface(gpu_addr, params); -            BufferCopy(overlaps[0], new_surface); -            return {new_surface, new_surface->GetMainView()}; +        image.flags |= ImageFlagBits::CpuModified; +        UntrackImage(image); +    }); +} + +template <class P> +void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { +    std::vector<ImageId> images; +    ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { +        // Skip images that were not modified from the GPU +        if (False(image.flags & ImageFlagBits::GpuModified)) { +            return;          } -        default: { -            UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); -            return InitializeSurface(gpu_addr, params, do_load); +        // Skip images that .are. modified from the CPU +        // We don't want to write sensitive data from the guest +        if (True(image.flags & ImageFlagBits::CpuModified)) { +            return;          } +        if (image.info.num_samples > 1) { +            LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); +            return;          } +        image.flags &= ~ImageFlagBits::GpuModified; +        images.push_back(image_id); +    }); +    if (images.empty()) { +        return; +    } +    std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { +        return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; +    }); +    for (const ImageId image_id : images) { +        Image& image = slot_images[image_id]; +        auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); +        const auto copies = FullDownloadCopies(image.info); +        image.DownloadMemory(map, 0, copies); +        runtime.Finish(); +        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());      } +} -    /** -     * Takes a single surface and recreates into another that may differ in -     * format, target or width alignment. -     * -     * @param current_surface The registered surface in the cache which we want to convert. -     * @param params          The new surface params which we'll use to recreate the surface. -     * @param is_render       Whether or not the surface is a render target. -     **/ -    std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, -                                              bool is_render) { -        const auto gpu_addr = current_surface->GetGpuAddr(); -        const auto& cr_params = current_surface->GetSurfaceParams(); -        TSurface new_surface; -        if (cr_params.pixel_format != params.pixel_format && !is_render && -            GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { -            SurfaceParams new_params = params; -            new_params.pixel_format = cr_params.pixel_format; -            new_params.type = cr_params.type; -            new_surface = GetUncachedSurface(gpu_addr, new_params); -        } else { -            new_surface = GetUncachedSurface(gpu_addr, params); -        } -        const SurfaceParams& final_params = new_surface->GetSurfaceParams(); -        if (cr_params.type != final_params.type) { -            if (Settings::IsGPULevelExtreme()) { -                BufferCopy(current_surface, new_surface); -            } -        } else { -            std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); -            for (auto& brick : bricks) { -                TryCopyImage(current_surface, new_surface, brick); -            } -        } -        Unregister(current_surface); -        Register(new_surface); -        new_surface->MarkAsModified(current_surface->IsModified(), Tick()); -        return {new_surface, new_surface->GetMainView()}; -    } - -    /** -     * Takes a single surface and checks with the new surface's params if it's an exact -     * match, we return the main view of the registered surface. If its formats don't -     * match, we rebuild the surface. We call this last method a `Mirage`. If formats -     * match but the targets don't, we create an overview View of the registered surface. -     * -     * @param current_surface The registered surface in the cache which we want to convert. -     * @param params          The new surface params which we want to check. -     * @param is_render       Whether or not the surface is a render target. -     **/ -    std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, -                                                     const SurfaceParams& params, bool is_render) { -        const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); -        const bool matches_target = current_surface->MatchTarget(params.target); -        const auto match_check = [&]() -> std::pair<TSurface, TView> { -            if (matches_target) { -                return {current_surface, current_surface->GetMainView()}; -            } -            return {current_surface, current_surface->EmplaceOverview(params)}; -        }; -        if (!is_mirage) { -            return match_check(); -        } -        if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { -            return match_check(); -        } -        return RebuildSurface(current_surface, params, is_render); -    } - -    /** -     * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate -     * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps -     * of the new surface, if they all match we end up recreating a surface for them, -     * else we return nothing. -     * -     * @param overlaps The overlapping surfaces registered in the cache. -     * @param params   The parameters on the new surface. -     * @param gpu_addr The starting address of the new surface. -     **/ -    std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, -                                                                    const SurfaceParams& params, -                                                                    GPUVAddr gpu_addr) { -        if (params.target == SurfaceTarget::Texture3D) { -            return std::nullopt; -        } -        const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; -        TSurface new_surface = GetUncachedSurface(gpu_addr, params); +template <class P> +void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { +    std::vector<ImageId> deleted_images; +    ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); +    for (const ImageId id : deleted_images) { +        Image& image = slot_images[id]; +        if (True(image.flags & ImageFlagBits::Tracked)) { +            UntrackImage(image); +        } +        UnregisterImage(id); +        DeleteImage(id); +    } +} -        if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { -            LoadSurface(new_surface); -            for (const auto& surface : overlaps) { -                Unregister(surface); -            } -            Register(new_surface); -            return {{new_surface, new_surface->GetMainView()}}; -        } +template <class P> +void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, +                                const Tegra::Engines::Fermi2D::Surface& src, +                                const Tegra::Engines::Fermi2D::Config& copy) { +    const BlitImages images = GetBlitImages(dst, src); +    const ImageId dst_id = images.dst_id; +    const ImageId src_id = images.src_id; +    PrepareImage(src_id, false, false); +    PrepareImage(dst_id, true, false); + +    ImageBase& dst_image = slot_images[dst_id]; +    const ImageBase& src_image = slot_images[src_id]; + +    // TODO: Deduplicate +    const std::optional dst_base = dst_image.TryFindBase(dst.Address()); +    const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; +    const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); +    const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); +    const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); +    const std::array src_region{ +        Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, +        Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, +    }; -        std::size_t passed_tests = 0; -        for (auto& surface : overlaps) { -            const SurfaceParams& src_params = surface->GetSurfaceParams(); -            const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; -            if (!mipmap_layer) { -                continue; -            } -            const auto [base_layer, base_mipmap] = *mipmap_layer; -            if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { -                continue; -            } -            ++passed_tests; - -            // Copy all mipmaps and layers -            const u32 block_width = params.GetDefaultBlockWidth(); -            const u32 block_height = params.GetDefaultBlockHeight(); -            for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { -                const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); -                const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); -                if (width < block_width || height < block_height) { -                    // Current APIs forbid copying small compressed textures, avoid errors -                    break; -                } -                const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, -                                             src_params.depth); -                TryCopyImage(surface, new_surface, copy_params); -            } -        } -        if (passed_tests == 0) { -            return std::nullopt; -        } -        if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { -            // In Accurate GPU all tests should pass, else we recycle -            return std::nullopt; -        } +    const std::optional src_base = src_image.TryFindBase(src.Address()); +    const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; +    const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); +    const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); +    const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); +    const std::array dst_region{ +        Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, +        Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, +    }; -        const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); -        for (const auto& surface : overlaps) { -            Unregister(surface); -        } +    // Always call this after src_framebuffer_id was queried, as the address might be invalidated. +    Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; +    if constexpr (FRAMEBUFFER_BLITS) { +        // OpenGL blits from framebuffers, not images +        Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; +        runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, +                                copy.filter, copy.operation); +    } else { +        // Vulkan can blit images, but it lacks format reinterpretations +        // Provide a framebuffer in case it's necessary +        ImageView& dst_view = slot_image_views[dst_view_id]; +        ImageView& src_view = slot_image_views[src_view_id]; +        runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, +                          copy.operation); +    } +} -        new_surface->MarkAsModified(modified, Tick()); -        Register(new_surface); -        return {{new_surface, new_surface->GetMainView()}}; -    } - -    /** -     * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D -     * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of -     * the HLE methods. -     * -     * @param overlaps  The overlapping surfaces registered in the cache. -     * @param params    The parameters on the new surface. -     * @param gpu_addr  The starting address of the new surface. -     * @param cpu_addr  The starting address of the new surface on physical memory. -     * @param preserve_contents Indicates that the new surface should be loaded from memory or -     *                          left blank. -     */ -    std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, -                                                               const SurfaceParams& params, -                                                               GPUVAddr gpu_addr, VAddr cpu_addr, -                                                               bool preserve_contents) { -        if (params.target != SurfaceTarget::Texture3D) { -            for (const auto& surface : overlaps) { -                if (!surface->MatchTarget(params.target)) { -                    if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { -                        if (Settings::IsGPULevelExtreme()) { -                            return std::nullopt; -                        } -                        Unregister(surface); -                        return InitializeSurface(gpu_addr, params, preserve_contents); -                    } -                    return std::nullopt; -                } -                if (surface->GetCpuAddr() != cpu_addr) { -                    continue; -                } -                if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { -                    return std::make_pair(surface, surface->GetMainView()); -                } -            } -            return InitializeSurface(gpu_addr, params, preserve_contents); -        } +template <class P> +void TextureCache<P>::InvalidateColorBuffer(size_t index) { +    ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; +    color_buffer_id = FindColorBuffer(index, false); +    if (!color_buffer_id) { +        LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); +        return; +    } +    // When invalidating a color buffer, the old contents are no longer relevant +    ImageView& color_buffer = slot_image_views[color_buffer_id]; +    Image& image = slot_images[color_buffer.image_id]; +    image.flags &= ~ImageFlagBits::CpuModified; +    image.flags &= ~ImageFlagBits::GpuModified; -        if (params.num_levels > 1) { -            // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach -            return std::nullopt; -        } +    runtime.InvalidateColorBuffer(color_buffer, index); +} -        if (overlaps.size() == 1) { -            const auto& surface = overlaps[0]; -            const SurfaceParams& overlap_params = surface->GetSurfaceParams(); -            // Don't attempt to render to textures with more than one level for now -            // The texture has to be to the right or the sample address if we want to render to it -            if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { -                const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); -                const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); -                if (slice < overlap_params.depth) { -                    auto view = surface->Emplace3DView(slice, params.depth, 0, 1); -                    return std::make_pair(std::move(surface), std::move(view)); -                } -            } -        } +template <class P> +void TextureCache<P>::InvalidateDepthBuffer() { +    ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; +    depth_buffer_id = FindDepthBuffer(false); +    if (!depth_buffer_id) { +        LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); +        return; +    } +    // When invalidating the depth buffer, the old contents are no longer relevant +    ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; +    image.flags &= ~ImageFlagBits::CpuModified; +    image.flags &= ~ImageFlagBits::GpuModified; -        TSurface new_surface = GetUncachedSurface(gpu_addr, params); -        bool modified = false; +    ImageView& depth_buffer = slot_image_views[depth_buffer_id]; +    runtime.InvalidateDepthBuffer(depth_buffer); +} -        for (auto& surface : overlaps) { -            const SurfaceParams& src_params = surface->GetSurfaceParams(); -            if (src_params.target != SurfaceTarget::Texture2D || -                src_params.height != params.height || -                src_params.block_depth != params.block_depth || -                src_params.block_height != params.block_height) { -                return std::nullopt; -            } -            modified |= surface->IsModified(); - -            const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); -            const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); -            const u32 width = params.width; -            const u32 height = params.height; -            const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); -            TryCopyImage(surface, new_surface, copy_params); +template <class P> +typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { +    // TODO: Properly implement this +    const auto it = page_table.find(cpu_addr >> PAGE_SHIFT); +    if (it == page_table.end()) { +        return nullptr; +    } +    const auto& image_ids = it->second; +    for (const ImageId image_id : image_ids) { +        const ImageBase& image = slot_images[image_id]; +        if (image.cpu_addr != cpu_addr) { +            continue;          } -        for (const auto& surface : overlaps) { -            Unregister(surface); +        if (image.image_view_ids.empty()) { +            continue;          } -        new_surface->MarkAsModified(modified, Tick()); -        Register(new_surface); - -        TView view = new_surface->GetMainView(); -        return std::make_pair(std::move(new_surface), std::move(view)); -    } - -    /** -     * Gets the starting address and parameters of a candidate surface and tries -     * to find a matching surface within the cache. This is done in 3 big steps: -     * -     * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. -     * -     * 2. Check if there are any overlaps at all, if there are none, we just load the texture from -     *    memory else we move to step 3. -     * -     * 3. Consists of figuring out the relationship between the candidate texture and the -     *    overlaps. We divide the scenarios depending if there's 1 or many overlaps. If -     *    there's many, we just try to reconstruct a new surface out of them based on the -     *    candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we -     *    have to check if the candidate is a view (layer/mipmap) of the overlap or if the -     *    registered surface is a mipmap/layer of the candidate. In this last case we reconstruct -     *    a new surface. -     * -     * @param gpu_addr          The starting address of the candidate surface. -     * @param params            The parameters on the candidate surface. -     * @param preserve_contents Indicates that the new surface should be loaded from memory or -     *                          left blank. -     * @param is_render         Whether or not the surface is a render target. -     **/ -    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, -                                          const SurfaceParams& params, bool preserve_contents, -                                          bool is_render) { -        // Step 1 -        // Check Level 1 Cache for a fast structural match. If candidate surface -        // matches at certain level we are pretty much done. -        if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { -            TSurface& current_surface = iter->second; -            const auto topological_result = current_surface->MatchesTopology(params); -            if (topological_result != MatchTopologyResult::FullMatch) { -                VectorSurface overlaps{current_surface}; -                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, -                                      topological_result); -            } +        return &slot_image_views[image.image_view_ids.at(0)]; +    } +    return nullptr; +} -            const auto struct_result = current_surface->MatchesStructure(params); -            if (struct_result != MatchStructureResult::None) { -                const auto& old_params = current_surface->GetSurfaceParams(); -                const bool not_3d = params.target != SurfaceTarget::Texture3D && -                                    old_params.target != SurfaceTarget::Texture3D; -                if (not_3d || current_surface->MatchTarget(params.target)) { -                    if (struct_result == MatchStructureResult::FullMatch) { -                        return ManageStructuralMatch(current_surface, params, is_render); -                    } else { -                        return RebuildSurface(current_surface, params, is_render); -                    } -                } -            } -        } +template <class P> +bool TextureCache<P>::HasUncommittedFlushes() const noexcept { +    return !uncommitted_downloads.empty(); +} -        // Step 2 -        // Obtain all possible overlaps in the memory region -        const std::size_t candidate_size = params.GetGuestSizeInBytes(); -        auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; +template <class P> +bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { +    return !committed_downloads.empty() && !committed_downloads.front().empty(); +} -        // If none are found, we are done. we just load the surface and create it. -        if (overlaps.empty()) { -            return InitializeSurface(gpu_addr, params, preserve_contents); -        } +template <class P> +void TextureCache<P>::CommitAsyncFlushes() { +    // This is intentionally passing the value by copy +    committed_downloads.push(uncommitted_downloads); +    uncommitted_downloads.clear(); +} -        // Step 3 -        // Now we need to figure the relationship between the texture and its overlaps -        // we do a topological test to ensure we can find some relationship. If it fails -        // immediately recycle the texture -        for (const auto& surface : overlaps) { -            const auto topological_result = surface->MatchesTopology(params); -            if (topological_result != MatchTopologyResult::FullMatch) { -                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, -                                      topological_result); -            } -        } +template <class P> +void TextureCache<P>::PopAsyncFlushes() { +    if (committed_downloads.empty()) { +        return; +    } +    const std::span<const ImageId> download_ids = committed_downloads.front(); +    if (download_ids.empty()) { +        committed_downloads.pop(); +        return; +    } +    size_t total_size_bytes = 0; +    for (const ImageId image_id : download_ids) { +        total_size_bytes += slot_images[image_id].unswizzled_size_bytes; +    } +    auto download_map = runtime.MapDownloadBuffer(total_size_bytes); +    size_t buffer_offset = 0; +    for (const ImageId image_id : download_ids) { +        Image& image = slot_images[image_id]; +        const auto copies = FullDownloadCopies(image.info); +        image.DownloadMemory(download_map, buffer_offset, copies); +        buffer_offset += image.unswizzled_size_bytes; +    } +    // Wait for downloads to finish +    runtime.Finish(); + +    buffer_offset = 0; +    const std::span<u8> download_span = download_map.Span(); +    for (const ImageId image_id : download_ids) { +        const ImageBase& image = slot_images[image_id]; +        const auto copies = FullDownloadCopies(image.info); +        const std::span<u8> image_download_span = download_span.subspan(buffer_offset); +        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); +        buffer_offset += image.unswizzled_size_bytes; +    } +    committed_downloads.pop(); +} -        // Manage 3D textures -        if (params.block_depth > 0) { -            auto surface = -                Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); -            if (surface) { -                return *surface; -            } +template <class P> +bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { +    bool is_modified = false; +    ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { +        if (False(image.flags & ImageFlagBits::GpuModified)) { +            return false;          } +        is_modified = true; +        return true; +    }); +    return is_modified; +} -        // Split cases between 1 overlap or many. -        if (overlaps.size() == 1) { -            TSurface current_surface = overlaps[0]; -            // First check if the surface is within the overlap. If not, it means -            // two things either the candidate surface is a supertexture of the overlap -            // or they don't match in any known way. -            if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { -                const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); -                if (view) { -                    return *view; -                } -                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, -                                      MatchTopologyResult::FullMatch); -            } -            // Now we check if the candidate is a mipmap/layer of the overlap -            std::optional<TView> view = -                current_surface->EmplaceView(params, gpu_addr, candidate_size); -            if (view) { -                const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); -                if (is_mirage) { -                    // On a mirage view, we need to recreate the surface under this new view -                    // and then obtain a view again. -                    SurfaceParams new_params = current_surface->GetSurfaceParams(); -                    const u32 wh = SurfaceParams::ConvertWidth( -                        new_params.width, new_params.pixel_format, params.pixel_format); -                    const u32 hh = SurfaceParams::ConvertHeight( -                        new_params.height, new_params.pixel_format, params.pixel_format); -                    new_params.width = wh; -                    new_params.height = hh; -                    new_params.pixel_format = params.pixel_format; -                    std::pair<TSurface, TView> pair = -                        RebuildSurface(current_surface, new_params, is_render); -                    std::optional<TView> mirage_view = -                        pair.first->EmplaceView(params, gpu_addr, candidate_size); -                    if (mirage_view) -                        return {pair.first, *mirage_view}; -                    return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, -                                          MatchTopologyResult::FullMatch); -                } -                return {current_surface, *view}; -            } -        } else { -            // If there are many overlaps, odds are they are subtextures of the candidate -            // surface. We try to construct a new surface based on the candidate parameters, -            // using the overlaps. If a single overlap fails, this will fail. -            std::optional<std::pair<TSurface, TView>> view = -                TryReconstructSurface(overlaps, params, gpu_addr); -            if (view) { -                return *view; -            } -        } -        // We failed all the tests, recycle the overlaps into a new texture. -        return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, -                              MatchTopologyResult::FullMatch); -    } - -    /** -     * Gets the starting address and parameters of a candidate surface and tries to find a -     * matching surface within the cache that's similar to it. If there are many textures -     * or the texture found if entirely incompatible, it will fail. If no texture is found, the -     * blit will be unsuccessful. -     * -     * @param gpu_addr The starting address of the candidate surface. -     * @param params   The parameters on the candidate surface. -     **/ -    Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { -        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - -        if (!cpu_addr) { -            Deduction result{}; -            result.type = DeductionType::DeductionFailed; -            return result; -        } +template <class P> +void TextureCache<P>::RefreshContents(Image& image) { +    if (False(image.flags & ImageFlagBits::CpuModified)) { +        // Only upload modified images +        return; +    } +    image.flags &= ~ImageFlagBits::CpuModified; +    TrackImage(image); -        if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { -            TSurface& current_surface = iter->second; -            const auto topological_result = current_surface->MatchesTopology(params); -            if (topological_result != MatchTopologyResult::FullMatch) { -                Deduction result{}; -                result.type = DeductionType::DeductionFailed; -                return result; -            } -            const auto struct_result = current_surface->MatchesStructure(params); -            if (struct_result != MatchStructureResult::None && -                current_surface->MatchTarget(params.target)) { -                Deduction result{}; -                result.type = DeductionType::DeductionComplete; -                result.surface = current_surface; -                return result; -            } -        } +    if (image.info.num_samples > 1) { +        LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); +        return; +    } +    auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); +    UploadImageContents(image, map, 0); +    runtime.InsertUploadMemoryBarrier(); +} -        const std::size_t candidate_size = params.GetGuestSizeInBytes(); -        auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; +template <class P> +template <typename MapBuffer> +void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { +    const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); +    const GPUVAddr gpu_addr = image.gpu_addr; + +    if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { +        gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); +        const auto uploads = FullUploadSwizzles(image.info); +        runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); +    } else if (True(image.flags & ImageFlagBits::Converted)) { +        std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); +        auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); +        ConvertImage(unswizzled_data, image.info, mapped_span, copies); +        image.UploadMemory(map, buffer_offset, copies); +    } else if (image.info.type == ImageType::Buffer) { +        const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; +        image.UploadMemory(map, buffer_offset, copies); +    } else { +        const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); +        image.UploadMemory(map, buffer_offset, copies); +    } +} -        if (overlaps.empty()) { -            Deduction result{}; -            result.type = DeductionType::DeductionIncomplete; -            return result; -        } +template <class P> +ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { +    if (!IsValidAddress(gpu_memory, config)) { +        return NULL_IMAGE_VIEW_ID; +    } +    const auto [pair, is_new] = image_views.try_emplace(config); +    ImageViewId& image_view_id = pair->second; +    if (is_new) { +        image_view_id = CreateImageView(config); +    } +    return image_view_id; +} -        if (overlaps.size() > 1) { -            Deduction result{}; -            result.type = DeductionType::DeductionFailed; -            return result; -        } else { -            Deduction result{}; -            result.type = DeductionType::DeductionComplete; -            result.surface = overlaps[0]; -            return result; -        } +template <class P> +ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { +    const ImageInfo info(config); +    const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; +    const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); +    if (!image_id) { +        return NULL_IMAGE_VIEW_ID;      } +    ImageBase& image = slot_images[image_id]; +    const SubresourceBase base = image.TryFindBase(config.Address()).value(); +    ASSERT(base.level == 0); +    const ImageViewInfo view_info(config, base.layer); +    const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); +    ImageViewBase& image_view = slot_image_views[image_view_id]; +    image_view.flags |= ImageViewFlagBits::Strong; +    image.flags |= ImageFlagBits::Strong; +    return image_view_id; +} -    /** -     * Gets a null surface based on a target texture. -     * @param target The target of the null surface. -     */ -    TView GetNullSurface(SurfaceTarget target) { -        const u32 i_target = static_cast<u32>(target); -        if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { -            return it->second->GetMainView(); -        } -        SurfaceParams params{}; -        params.target = target; -        params.is_tiled = false; -        params.srgb_conversion = false; -        params.is_layered = -            target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || -            target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; -        params.block_width = 0; -        params.block_height = 0; -        params.block_depth = 0; -        params.tile_width_spacing = 1; -        params.width = 1; -        params.height = 1; -        params.depth = 1; -        if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { -            params.depth = 6; -        } -        params.pitch = 4; -        params.num_levels = 1; -        params.emulated_levels = 1; -        params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; -        params.type = VideoCore::Surface::SurfaceType::ColorTexture; -        auto surface = CreateSurface(0ULL, params); -        invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); -        surface->UploadTexture(invalid_memory); -        surface->MarkAsModified(false, Tick()); -        invalid_cache.emplace(i_target, surface); -        return surface->GetMainView(); -    } - -    /** -     * Gets the a source and destination starting address and parameters, -     * and tries to deduce if they are supposed to be depth textures. If so, their -     * parameters are modified and fixed into so. -     * -     * @param src_params   The parameters of the candidate surface. -     * @param dst_params   The parameters of the destination surface. -     * @param src_gpu_addr The starting address of the candidate surface. -     * @param dst_gpu_addr The starting address of the destination surface. -     **/ -    void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, -                        const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { -        auto deduced_src = DeduceSurface(src_gpu_addr, src_params); -        auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); -        if (deduced_src.Failed() || deduced_dst.Failed()) { -            return; +template <class P> +ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, +                                           RelaxedOptions options) { +    if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { +        return image_id; +    } +    return InsertImage(info, gpu_addr, options); +} + +template <class P> +ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, +                                   RelaxedOptions options) { +    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); +    if (!cpu_addr) { +        return ImageId{}; +    } +    ImageId image_id; +    const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { +        if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { +            const bool strict_size = False(options & RelaxedOptions::Size) && +                                     True(existing_image.flags & ImageFlagBits::Strong); +            const ImageInfo& existing = existing_image.info; +            if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && +                existing.pitch == info.pitch && +                IsPitchLinearSameSize(existing, info, strict_size) && +                IsViewCompatible(existing.format, info.format)) { +                image_id = existing_image_id; +                return true; +            } +        } else if (IsSubresource(info, existing_image, gpu_addr, options)) { +            image_id = existing_image_id; +            return true;          } +        return false; +    }; +    ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); +    return image_id; +} -        const bool incomplete_src = deduced_src.Incomplete(); -        const bool incomplete_dst = deduced_dst.Incomplete(); +template <class P> +ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, +                                     RelaxedOptions options) { +    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); +    ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); +    const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); +    const Image& image = slot_images[image_id]; +    // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different +    const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); +    if (is_new) { +        it->second = slot_image_allocs.insert(); +    } +    slot_image_allocs[it->second].images.push_back(image_id); +    return image_id; +} -        if (incomplete_src && incomplete_dst) { +template <class P> +ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { +    ImageInfo new_info = info; +    const size_t size_bytes = CalculateGuestSizeInBytes(new_info); +    std::vector<ImageId> overlap_ids; +    std::vector<ImageId> left_aliased_ids; +    std::vector<ImageId> right_aliased_ids; +    ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { +        if (info.type != overlap.info.type) {              return;          } - -        const bool any_incomplete = incomplete_src || incomplete_dst; - -        if (!any_incomplete) { -            if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { -                return; -            } -        } else { -            if (incomplete_src && !(deduced_dst.IsDepth())) { -                return; -            } - -            if (incomplete_dst && !(deduced_src.IsDepth())) { -                return; +        if (info.type == ImageType::Linear) { +            if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { +                // Alias linear images with the same pitch +                left_aliased_ids.push_back(overlap_id);              } +            return; +        } +        const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true); +        if (solution) { +            gpu_addr = solution->gpu_addr; +            cpu_addr = solution->cpu_addr; +            new_info.resources = solution->resources; +            overlap_ids.push_back(overlap_id); +            return; +        } +        static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; +        const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); +        if (IsSubresource(new_info, overlap, gpu_addr, options)) { +            left_aliased_ids.push_back(overlap_id); +        } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { +            right_aliased_ids.push_back(overlap_id);          } +    }); +    const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); +    Image& new_image = slot_images[new_image_id]; -        const auto inherit_format = [](SurfaceParams& to, TSurface from) { -            const SurfaceParams& params = from->GetSurfaceParams(); -            to.pixel_format = params.pixel_format; -            to.type = params.type; -        }; -        // Now we got the cases where one or both is Depth and the other is not known -        if (!incomplete_src) { -            inherit_format(src_params, deduced_src.surface); +    // TODO: Only upload what we need +    RefreshContents(new_image); + +    for (const ImageId overlap_id : overlap_ids) { +        Image& overlap = slot_images[overlap_id]; +        if (overlap.info.num_samples != new_image.info.num_samples) { +            LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");          } else { -            inherit_format(src_params, deduced_dst.surface); +            const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); +            const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); +            runtime.CopyImage(new_image, overlap, copies);          } -        if (!incomplete_dst) { -            inherit_format(dst_params, deduced_dst.surface); -        } else { -            inherit_format(dst_params, deduced_src.surface); +        if (True(overlap.flags & ImageFlagBits::Tracked)) { +            UntrackImage(overlap);          } +        UnregisterImage(overlap_id); +        DeleteImage(overlap_id); +    } +    ImageBase& new_image_base = new_image; +    for (const ImageId aliased_id : right_aliased_ids) { +        ImageBase& aliased = slot_images[aliased_id]; +        AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); +    } +    for (const ImageId aliased_id : left_aliased_ids) { +        ImageBase& aliased = slot_images[aliased_id]; +        AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);      } +    RegisterImage(new_image_id); +    return new_image_id; +} -    std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, -                                                 bool preserve_contents) { -        auto new_surface{GetUncachedSurface(gpu_addr, params)}; -        Register(new_surface); -        if (preserve_contents) { -            LoadSurface(new_surface); -        } -        return {new_surface, new_surface->GetMainView()}; +template <class P> +typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( +    const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { +    static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; +    const GPUVAddr dst_addr = dst.Address(); +    const GPUVAddr src_addr = src.Address(); +    ImageInfo dst_info(dst); +    ImageInfo src_info(src); +    ImageId dst_id; +    ImageId src_id; +    do { +        has_deleted_images = false; +        dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); +        src_id = FindImage(src_info, src_addr, FIND_OPTIONS); +        const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; +        const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; +        DeduceBlitImages(dst_info, src_info, dst_image, src_image); +        if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { +            continue; +        } +        if (!dst_id) { +            dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); +        } +        if (!src_id) { +            src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); +        } +    } while (has_deleted_images); +    return BlitImages{ +        .dst_id = dst_id, +        .src_id = src_id, +        .dst_format = dst_info.format, +        .src_format = src_info.format, +    }; +} + +template <class P> +SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { +    if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { +        return NULL_SAMPLER_ID; +    } +    const auto [pair, is_new] = samplers.try_emplace(config); +    if (is_new) { +        pair->second = slot_samplers.insert(runtime, config);      } +    return pair->second; +} -    void LoadSurface(const TSurface& surface) { -        staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); -        surface->LoadBuffer(gpu_memory, staging_cache); -        surface->UploadTexture(staging_cache.GetBuffer(0)); -        surface->MarkAsModified(false, Tick()); +template <class P> +ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { +    const auto& regs = maxwell3d.regs; +    if (index >= regs.rt_control.count) { +        return ImageViewId{}; +    } +    const auto& rt = regs.rt[index]; +    const GPUVAddr gpu_addr = rt.Address(); +    if (gpu_addr == 0) { +        return ImageViewId{}; +    } +    if (rt.format == Tegra::RenderTargetFormat::NONE) { +        return ImageViewId{};      } +    const ImageInfo info(regs, index); +    return FindRenderTargetView(info, gpu_addr, is_clear); +} -    void FlushSurface(const TSurface& surface) { -        if (!surface->IsModified()) { -            return; -        } -        staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); -        surface->DownloadTexture(staging_cache.GetBuffer(0)); -        surface->FlushBuffer(gpu_memory, staging_cache); -        surface->MarkAsModified(false, Tick()); -    } - -    void RegisterInnerCache(TSurface& surface) { -        const VAddr cpu_addr = surface->GetCpuAddr(); -        VAddr start = cpu_addr >> registry_page_bits; -        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; -        l1_cache[cpu_addr] = surface; -        while (start <= end) { -            registry[start].push_back(surface); -            start++; -        } +template <class P> +ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { +    const auto& regs = maxwell3d.regs; +    if (!regs.zeta_enable) { +        return ImageViewId{}; +    } +    const GPUVAddr gpu_addr = regs.zeta.Address(); +    if (gpu_addr == 0) { +        return ImageViewId{};      } +    const ImageInfo info(regs); +    return FindRenderTargetView(info, gpu_addr, is_clear); +} -    void UnregisterInnerCache(TSurface& surface) { -        const VAddr cpu_addr = surface->GetCpuAddr(); -        VAddr start = cpu_addr >> registry_page_bits; -        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; -        l1_cache.erase(cpu_addr); -        while (start <= end) { -            auto& reg{registry[start]}; -            reg.erase(std::find(reg.begin(), reg.end(), surface)); -            start++; -        } +template <class P> +ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, +                                                  bool is_clear) { +    const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; +    const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); +    if (!image_id) { +        return NULL_IMAGE_VIEW_ID; +    } +    Image& image = slot_images[image_id]; +    const ImageViewType view_type = RenderTargetImageViewType(info); +    SubresourceBase base; +    if (image.info.type == ImageType::Linear) { +        base = SubresourceBase{.level = 0, .layer = 0}; +    } else { +        base = image.TryFindBase(gpu_addr).value();      } +    const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; +    const SubresourceRange range{ +        .base = base, +        .extent = {.levels = 1, .layers = layers}, +    }; +    return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} -    VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { -        if (size == 0) { -            return {}; +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { +    using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; +    static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; +    boost::container::small_vector<ImageId, 32> images; +    ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { +        const auto it = page_table.find(page); +        if (it == page_table.end()) { +            if constexpr (BOOL_BREAK) { +                return false; +            } else { +                return; +            }          } -        const VAddr cpu_addr_end = cpu_addr + size; -        const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; -        VectorSurface surfaces; -        for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { -            const auto it = registry.find(start); -            if (it == registry.end()) { +        for (const ImageId image_id : it->second) { +            Image& image = slot_images[image_id]; +            if (True(image.flags & ImageFlagBits::Picked)) {                  continue;              } -            for (auto& surface : it->second) { -                if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { -                    continue; +            if (!image.Overlaps(cpu_addr, size)) { +                continue; +            } +            image.flags |= ImageFlagBits::Picked; +            images.push_back(image_id); +            if constexpr (BOOL_BREAK) { +                if (func(image_id, image)) { +                    return true;                  } -                surface->MarkAsPicked(true); -                surfaces.push_back(surface); +            } else { +                func(image_id, image);              }          } -        for (auto& surface : surfaces) { -            surface->MarkAsPicked(false); +        if constexpr (BOOL_BREAK) { +            return false;          } -        return surfaces; +    }); +    for (const ImageId image_id : images) { +        slot_images[image_id].flags &= ~ImageFlagBits::Picked;      } +} -    void ReserveSurface(const SurfaceParams& params, TSurface surface) { -        surface_reserve[params].push_back(std::move(surface)); +template <class P> +ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { +    Image& image = slot_images[image_id]; +    if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { +        return image_view_id;      } +    const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); +    image.InsertView(info, image_view_id); +    return image_view_id; +} + +template <class P> +void TextureCache<P>::RegisterImage(ImageId image_id) { +    ImageBase& image = slot_images[image_id]; +    ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), +               "Trying to register an already registered image"); +    image.flags |= ImageFlagBits::Registered; +    ForEachPage(image.cpu_addr, image.guest_size_bytes, +                [this, image_id](u64 page) { page_table[page].push_back(image_id); }); +} -    TSurface TryGetReservedSurface(const SurfaceParams& params) { -        auto search{surface_reserve.find(params)}; -        if (search == surface_reserve.end()) { -            return {}; +template <class P> +void TextureCache<P>::UnregisterImage(ImageId image_id) { +    Image& image = slot_images[image_id]; +    ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), +               "Trying to unregister an already registered image"); +    image.flags &= ~ImageFlagBits::Registered; +    ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { +        const auto page_it = page_table.find(page); +        if (page_it == page_table.end()) { +            UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT); +            return;          } -        for (auto& surface : search->second) { -            if (!surface->IsRegistered()) { -                return surface; -            } +        std::vector<ImageId>& image_ids = page_it->second; +        const auto vector_it = std::ranges::find(image_ids, image_id); +        if (vector_it == image_ids.end()) { +            UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT); +            return;          } -        return {}; -    } +        image_ids.erase(vector_it); +    }); +} -    /// Try to do an image copy logging when formats are incompatible. -    void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { -        const SurfaceParams& src_params = src->GetSurfaceParams(); -        const SurfaceParams& dst_params = dst->GetSurfaceParams(); -        if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { -            LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, -                      src_params.pixel_format); -            return; +template <class P> +void TextureCache<P>::TrackImage(ImageBase& image) { +    ASSERT(False(image.flags & ImageFlagBits::Tracked)); +    image.flags |= ImageFlagBits::Tracked; +    rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +} + +template <class P> +void TextureCache<P>::UntrackImage(ImageBase& image) { +    ASSERT(True(image.flags & ImageFlagBits::Tracked)); +    image.flags &= ~ImageFlagBits::Tracked; +    rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +} + +template <class P> +void TextureCache<P>::DeleteImage(ImageId image_id) { +    ImageBase& image = slot_images[image_id]; +    const GPUVAddr gpu_addr = image.gpu_addr; +    const auto alloc_it = image_allocs_table.find(gpu_addr); +    if (alloc_it == image_allocs_table.end()) { +        UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", +                        gpu_addr); +        return; +    } +    const ImageAllocId alloc_id = alloc_it->second; +    std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; +    const auto alloc_image_it = std::ranges::find(alloc_images, image_id); +    if (alloc_image_it == alloc_images.end()) { +        UNREACHABLE_MSG("Trying to delete an image that does not exist"); +        return; +    } +    ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); +    ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + +    // Mark render targets as dirty +    auto& dirty = maxwell3d.dirty.flags; +    dirty[Dirty::RenderTargets] = true; +    dirty[Dirty::ZetaBuffer] = true; +    for (size_t rt = 0; rt < NUM_RT; ++rt) { +        dirty[Dirty::ColorBuffer0 + rt] = true; +    } +    const std::span<const ImageViewId> image_view_ids = image.image_view_ids; +    for (const ImageViewId image_view_id : image_view_ids) { +        std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); +        if (render_targets.depth_buffer_id == image_view_id) { +            render_targets.depth_buffer_id = ImageViewId{};          } -        ImageCopy(src, dst, copy);      } +    RemoveImageViewReferences(image_view_ids); +    RemoveFramebuffers(image_view_ids); + +    for (const AliasedImage& alias : image.aliased_images) { +        ImageBase& other_image = slot_images[alias.id]; +        [[maybe_unused]] const size_t num_removed_aliases = +            std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { +                return other_alias.id == image_id; +            }); +        ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", +                   num_removed_aliases); +    } +    for (const ImageViewId image_view_id : image_view_ids) { +        sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); +        slot_image_views.erase(image_view_id); +    } +    sentenced_images.Push(std::move(slot_images[image_id])); +    slot_images.erase(image_id); -    constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { -        return siblings_table[static_cast<std::size_t>(format)]; +    alloc_images.erase(alloc_image_it); +    if (alloc_images.empty()) { +        image_allocs_table.erase(alloc_it);      } +    if constexpr (ENABLE_VALIDATION) { +        std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); +        std::ranges::fill(compute_image_view_ids, CORRUPT_ID); +    } +    graphics_image_table.Invalidate(); +    compute_image_table.Invalidate(); +    has_deleted_images = true; +} -    /// Returns true the shader sampler entry is compatible with the TIC texture type. -    static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, -                                 const VideoCommon::Shader::Sampler& entry) { -        const auto shader_type = entry.type; -        switch (tic_type) { -        case Tegra::Texture::TextureType::Texture1D: -        case Tegra::Texture::TextureType::Texture1DArray: -            return shader_type == Tegra::Shader::TextureType::Texture1D; -        case Tegra::Texture::TextureType::Texture1DBuffer: -            // TODO(Rodrigo): Assume as valid for now -            return true; -        case Tegra::Texture::TextureType::Texture2D: -        case Tegra::Texture::TextureType::Texture2DNoMipmap: -            return shader_type == Tegra::Shader::TextureType::Texture2D; -        case Tegra::Texture::TextureType::Texture2DArray: -            return shader_type == Tegra::Shader::TextureType::Texture2D || -                   shader_type == Tegra::Shader::TextureType::TextureCube; -        case Tegra::Texture::TextureType::Texture3D: -            return shader_type == Tegra::Shader::TextureType::Texture3D; -        case Tegra::Texture::TextureType::TextureCubeArray: -        case Tegra::Texture::TextureType::TextureCubemap: -            if (shader_type == Tegra::Shader::TextureType::TextureCube) { -                return true; -            } -            return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; +template <class P> +void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { +    auto it = image_views.begin(); +    while (it != image_views.end()) { +        const auto found = std::ranges::find(removed_views, it->second); +        if (found != removed_views.end()) { +            it = image_views.erase(it); +        } else { +            ++it;          } -        UNREACHABLE(); -        return true;      } +} -    struct FramebufferTargetInfo { -        TSurface target; -        TView view; -    }; - -    void AsyncFlushSurface(TSurface& surface) { -        if (!uncommitted_flushes) { -            uncommitted_flushes = std::make_shared<std::list<TSurface>>(); +template <class P> +void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { +    auto it = framebuffers.begin(); +    while (it != framebuffers.end()) { +        if (it->first.Contains(removed_views)) { +            it = framebuffers.erase(it); +        } else { +            ++it;          } -        uncommitted_flushes->push_back(surface);      } +} -    VideoCore::RasterizerInterface& rasterizer; -    Tegra::Engines::Maxwell3D& maxwell3d; -    Tegra::MemoryManager& gpu_memory; - -    FormatLookupTable format_lookup_table; -    FormatCompatibility format_compatibility; - -    u64 ticks{}; - -    // Guards the cache for protection conflicts. -    bool guard_render_targets{}; -    bool guard_samplers{}; - -    // The siblings table is for formats that can inter exchange with one another -    // without causing issues. This is only valid when a conflict occurs on a non -    // rendering use. -    std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; - -    // The internal Cache is different for the Texture Cache. It's based on buckets -    // of 1MB. This fits better for the purpose of this cache as textures are normaly -    // large in size. -    static constexpr u64 registry_page_bits{20}; -    static constexpr u64 registry_page_size{1 << registry_page_bits}; -    std::unordered_map<VAddr, std::vector<TSurface>> registry; +template <class P> +void TextureCache<P>::MarkModification(ImageBase& image) noexcept { +    image.flags |= ImageFlagBits::GpuModified; +    image.modification_tick = ++modification_tick; +} -    static constexpr u32 DEPTH_RT = 8; -    static constexpr u32 NO_RT = 0xFFFFFFFF; +template <class P> +void TextureCache<P>::SynchronizeAliases(ImageId image_id) { +    boost::container::small_vector<const AliasedImage*, 1> aliased_images; +    ImageBase& image = slot_images[image_id]; +    u64 most_recent_tick = image.modification_tick; +    for (const AliasedImage& aliased : image.aliased_images) { +        ImageBase& aliased_image = slot_images[aliased.id]; +        if (image.modification_tick < aliased_image.modification_tick) { +            most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); +            aliased_images.push_back(&aliased); +        } +    } +    if (aliased_images.empty()) { +        return; +    } +    image.modification_tick = most_recent_tick; +    std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { +        const ImageBase& lhs_image = slot_images[lhs->id]; +        const ImageBase& rhs_image = slot_images[rhs->id]; +        return lhs_image.modification_tick < rhs_image.modification_tick; +    }); +    for (const AliasedImage* const aliased : aliased_images) { +        CopyImage(image_id, aliased->id, aliased->copies); +    } +} -    // The L1 Cache is used for fast texture lookup before checking the overlaps -    // This avoids calculating size and other stuffs. -    std::unordered_map<VAddr, TSurface> l1_cache; +template <class P> +void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { +    Image& image = slot_images[image_id]; +    if (invalidate) { +        image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); +        if (False(image.flags & ImageFlagBits::Tracked)) { +            TrackImage(image); +        } +    } else { +        RefreshContents(image); +        SynchronizeAliases(image_id); +    } +    if (is_modification) { +        MarkModification(image); +    } +    image.frame_tick = frame_tick; +} -    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have -    /// previously been used. This is to prevent surfaces from being constantly created and -    /// destroyed when used with different surface parameters. -    std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; -    std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> -        render_targets; -    FramebufferTargetInfo depth_buffer; +template <class P> +void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, +                                       bool invalidate) { +    if (!image_view_id) { +        return; +    } +    const ImageViewBase& image_view = slot_image_views[image_view_id]; +    PrepareImage(image_view.image_id, is_modification, invalidate); +} -    std::vector<TSurface> sampled_textures; +template <class P> +void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { +    Image& dst = slot_images[dst_id]; +    Image& src = slot_images[src_id]; +    const auto dst_format_type = GetFormatType(dst.info.format); +    const auto src_format_type = GetFormatType(src.info.format); +    if (src_format_type == dst_format_type) { +        if constexpr (HAS_EMULATED_COPIES) { +            if (!runtime.CanImageBeCopied(dst, src)) { +                return runtime.EmulateCopyImage(dst, src, copies); +            } +        } +        return runtime.CopyImage(dst, src, copies); +    } +    UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); +    UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); +    for (const ImageCopy& copy : copies) { +        UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); +        UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); +        UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); +        UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + +        const SubresourceBase dst_base{ +            .level = copy.dst_subresource.base_level, +            .layer = copy.dst_subresource.base_layer, +        }; +        const SubresourceBase src_base{ +            .level = copy.src_subresource.base_level, +            .layer = copy.src_subresource.base_layer, +        }; +        const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; +        const SubresourceExtent src_extent{.levels = 1, .layers = 1}; +        const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; +        const SubresourceRange src_range{.base = src_base, .extent = src_extent}; +        const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); +        const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); +        const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); +        Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; +        const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); +        ImageView& dst_view = slot_image_views[dst_view_id]; +        ImageView& src_view = slot_image_views[src_view_id]; +        [[maybe_unused]] const Extent3D expected_size{ +            .width = std::min(dst_view.size.width, src_view.size.width), +            .height = std::min(dst_view.size.height, src_view.size.height), +            .depth = std::min(dst_view.size.depth, src_view.size.depth), +        }; +        UNIMPLEMENTED_IF(copy.extent != expected_size); -    /// This cache stores null surfaces in order to be used as a placeholder -    /// for invalid texture calls. -    std::unordered_map<u32, TSurface> invalid_cache; -    std::vector<u8> invalid_memory; +        runtime.ConvertImage(dst_framebuffer, dst_view, src_view); +    } +} -    std::list<TSurface> marked_for_unregister; +template <class P> +void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { +    if (*old_id == new_id) { +        return; +    } +    if (*old_id) { +        const ImageViewBase& old_view = slot_image_views[*old_id]; +        if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { +            uncommitted_downloads.push_back(old_view.image_id); +        } +    } +    *old_id = new_id; +} -    std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; -    std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; +template <class P> +std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( +    ImageId image_id, const ImageViewInfo& view_info) { +    const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); +    const ImageBase& image = slot_images[image_id]; +    const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; +    const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; +    const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; +    const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); +    const u32 num_samples = image.info.num_samples; +    const auto [samples_x, samples_y] = SamplesLog2(num_samples); +    const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ +        .color_buffer_ids = {color_view_id}, +        .depth_buffer_id = depth_view_id, +        .size = {extent.width >> samples_x, extent.height >> samples_y}, +    }); +    return {framebuffer_id, view_id}; +} -    StagingCache staging_cache; -    std::recursive_mutex mutex; -}; +template <class P> +bool TextureCache<P>::IsFullClear(ImageViewId id) { +    if (!id) { +        return true; +    } +    const ImageViewBase& image_view = slot_image_views[id]; +    const ImageBase& image = slot_images[image_view.image_id]; +    const Extent3D size = image_view.size; +    const auto& regs = maxwell3d.regs; +    const auto& scissor = regs.scissor_test[0]; +    if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { +        // Images with multiple resources can't be cleared in a single call +        return false; +    } +    if (regs.clear_flags.scissor == 0) { +        // If scissor testing is disabled, the clear is always full +        return true; +    } +    // Make sure the clear covers all texels in the subresource +    return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && +           scissor.max_y >= size.height; +}  } // namespace VideoCommon diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h new file mode 100644 index 000000000..2ad2d72a6 --- /dev/null +++ b/src/video_core/texture_cache/types.h @@ -0,0 +1,140 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/slot_vector.h" + +namespace VideoCommon { + +constexpr size_t NUM_RT = 8; +constexpr size_t MAX_MIP_LEVELS = 14; + +constexpr SlotId CORRUPT_ID{0xfffffffe}; + +using ImageId = SlotId; +using ImageViewId = SlotId; +using ImageAllocId = SlotId; +using SamplerId = SlotId; +using FramebufferId = SlotId; + +enum class ImageType : u32 { +    e1D, +    e2D, +    e3D, +    Linear, +    Buffer, +}; + +enum class ImageViewType : u32 { +    e1D, +    e2D, +    Cube, +    e3D, +    e1DArray, +    e2DArray, +    CubeArray, +    Rect, +    Buffer, +}; +constexpr size_t NUM_IMAGE_VIEW_TYPES = 9; + +enum class RelaxedOptions : u32 { +    Size = 1 << 0, +    Format = 1 << 1, +    Samples = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) + +struct Offset2D { +    constexpr auto operator<=>(const Offset2D&) const noexcept = default; + +    s32 x; +    s32 y; +}; + +struct Offset3D { +    constexpr auto operator<=>(const Offset3D&) const noexcept = default; + +    s32 x; +    s32 y; +    s32 z; +}; + +struct Extent2D { +    constexpr auto operator<=>(const Extent2D&) const noexcept = default; + +    u32 width; +    u32 height; +}; + +struct Extent3D { +    constexpr auto operator<=>(const Extent3D&) const noexcept = default; + +    u32 width; +    u32 height; +    u32 depth; +}; + +struct SubresourceLayers { +    s32 base_level = 0; +    s32 base_layer = 0; +    s32 num_layers = 1; +}; + +struct SubresourceBase { +    constexpr auto operator<=>(const SubresourceBase&) const noexcept = default; + +    s32 level = 0; +    s32 layer = 0; +}; + +struct SubresourceExtent { +    constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default; + +    s32 levels = 1; +    s32 layers = 1; +}; + +struct SubresourceRange { +    constexpr auto operator<=>(const SubresourceRange&) const noexcept = default; + +    SubresourceBase base; +    SubresourceExtent extent; +}; + +struct ImageCopy { +    SubresourceLayers src_subresource; +    SubresourceLayers dst_subresource; +    Offset3D src_offset; +    Offset3D dst_offset; +    Extent3D extent; +}; + +struct BufferImageCopy { +    size_t buffer_offset; +    size_t buffer_size; +    u32 buffer_row_length; +    u32 buffer_image_height; +    SubresourceLayers image_subresource; +    Offset3D image_offset; +    Extent3D image_extent; +}; + +struct BufferCopy { +    size_t src_offset; +    size_t dst_offset; +    size_t size; +}; + +struct SwizzleParameters { +    Extent3D num_tiles; +    Extent3D block; +    size_t buffer_offset; +    s32 level; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp new file mode 100644 index 000000000..9ed1fc007 --- /dev/null +++ b/src/video_core/texture_cache/util.cpp @@ -0,0 +1,1232 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This files contains code from Ryujinx +// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx +// The sections using code from Ryujinx are marked with a link to the original version + +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include <algorithm> +#include <array> +#include <numeric> +#include <optional> +#include <span> +#include <vector> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/compatible_formats.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/astc.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon { + +namespace { + +using Tegra::Texture::GOB_SIZE; +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using Tegra::Texture::GOB_SIZE_Z; +using Tegra::Texture::GOB_SIZE_Z_SHIFT; +using Tegra::Texture::MsaaMode; +using Tegra::Texture::SwizzleTexture; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::UnswizzleTexture; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsViewCompatible; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; + +constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); + +struct LevelInfo { +    Extent3D size; +    Extent3D block; +    Extent2D tile_size; +    u32 bpp_log2; +    u32 tile_width_spacing; +}; + +[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { +    if (shift == 0) { +        return 0; +    } +    u32 x = unit_factor << (shift - 1); +    if (x >= dimension) { +        while (--shift) { +            x >>= 1; +            if (x < dimension) { +                break; +            } +        } +    } +    return shift; +} + +[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) { +    return std::max<u32>(size >> level, 1); +} + +[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) { +    return Extent3D{ +        .width = AdjustMipSize(size.width, level), +        .height = AdjustMipSize(size.height, level), +        .depth = AdjustMipSize(size.depth, level), +    }; +} + +[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) { +    const auto [samples_x, samples_y] = SamplesLog2(num_samples); +    return Extent3D{ +        .width = size.width >> samples_x, +        .height = size.height >> samples_y, +        .depth = size.depth, +    }; +} + +template <u32 GOB_EXTENT> +[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) { +    do { +        while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) { +            --block_size; +        } +    } while (level--); +    return block_size; +} + +[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, +                                                    u32 level) { +    return { +        .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), +        .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), +        .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), +    }; +} + +[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) { +    return { +        .width = Common::DivCeil(size.width, tile_size.width), +        .height = Common::DivCeil(size.height, tile_size.height), +        .depth = size.depth, +    }; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) { +    return std::countl_zero(bytes_per_block) ^ 0x1F; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) { +    return BytesPerBlockLog2(BytesPerBlock(format)); +} + +[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) { +    const Extent3D num_blocks = AdjustTileSize(size, tile_size); +    return num_blocks.width * num_blocks.height * num_blocks.depth; +} + +[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) { +    return Common::DivCeil(AdjustMipSize(size, level), block_size); +} + +[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { +    return config.Width() * config.Height() * BytesPerBlock(format); +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { +    switch (type) { +    case TextureType::Texture2D: +    case TextureType::Texture2DArray: +    case TextureType::Texture2DNoMipmap: +    case TextureType::Texture3D: +    case TextureType::TextureCubeArray: +    case TextureType::TextureCubemap: +        return true; +    case TextureType::Texture1D: +    case TextureType::Texture1DArray: +    case TextureType::Texture1DBuffer: +        return false; +    } +    return false; +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { +    switch (type) { +    case ImageType::e2D: +    case ImageType::e3D: +    case ImageType::Linear: +        return true; +    case ImageType::e1D: +    case ImageType::Buffer: +        return false; +    } +    UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type)); +} + +[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) { +    switch (num_samples) { +    case 1: +        return {1, 1}; +    case 2: +        return {2, 1}; +    case 4: +        return {2, 2}; +    case 8: +        return {4, 2}; +    case 16: +        return {4, 4}; +    } +    UNREACHABLE_MSG("Invalid number of samples={}", num_samples); +    return {1, 1}; +} + +[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) { +    return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; +} + +[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) { +    return Extent3D{ +        .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2, +        .height = AdjustSize(info.size.height, level, info.tile_size.height), +        .depth = AdjustMipSize(info.size.depth, level), +    }; +} + +[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { +    const Extent3D blocks = NumLevelBlocks(info, level); +    return Extent3D{ +        .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), +        .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height), +        .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth), +    }; +} + +[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) { +    return Extent2D{ +        .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing, +        .height = GOB_SIZE_Y_SHIFT + block_height, +    }; +} + +[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, +                                                  u32 block_depth) { +    return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) || +           num_tiles.depth < (1U << block_depth); +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, +                                            u32 bpp_log2) { +    if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) { +        return GOB_SIZE_X_SHIFT - bpp_log2; +    } else { +        return gob.width; +    } +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2, +                                            u32 tile_width_spacing) { +    const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing); +    return StrideAlignment(num_tiles, block, gob, bpp_log2); +} + +[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) { +    const Extent3D blocks = NumLevelBlocks(info, level); +    const Extent2D gobs{ +        .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT), +        .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT), +    }; +    const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing); +    const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); +    const u32 alignment = is_small ? 0 : info.tile_width_spacing; +    return Extent2D{ +        .width = Common::AlignBits(gobs.width, alignment), +        .height = gobs.height, +    }; +} + +[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) { +    const Extent3D blocks = NumLevelBlocks(info, level); +    const Extent3D tile_shift = TileShift(info, level); +    const Extent2D gobs = NumGobs(info, level); +    return Extent3D{ +        .width = Common::DivCeilLog2(gobs.width, tile_shift.width), +        .height = Common::DivCeilLog2(gobs.height, tile_shift.height), +        .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth), +    }; +} + +[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) { +    const Extent3D tile_shift = TileShift(info, level); +    const Extent3D tiles = LevelTiles(info, level); +    const u32 num_tiles = tiles.width * tiles.height * tiles.depth; +    const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth; +    return num_tiles << shift; +} + +[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info, +                                                                            u32 num_levels) { +    ASSERT(num_levels <= MAX_MIP_LEVELS); +    std::array<u32, MAX_MIP_LEVELS> sizes{}; +    for (u32 level = 0; level < num_levels; ++level) { +        sizes[level] = CalculateLevelSize(info, level); +    } +    return sizes; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, +                                                u32 num_samples, u32 tile_width_spacing) { +    const auto [samples_x, samples_y] = Samples(num_samples); +    const u32 bytes_per_block = BytesPerBlock(format); +    return { +        .size = +            { +                .width = size.width * samples_x, +                .height = size.height * samples_y, +                .depth = size.depth, +            }, +        .block = block, +        .tile_size = DefaultBlockSize(format), +        .bpp_log2 = BytesPerBlockLog2(bytes_per_block), +        .tile_width_spacing = tile_width_spacing, +    }; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { +    return MakeLevelInfo(info.format, info.size, info.block, info.num_samples, +                         info.tile_width_spacing); +} + +[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, +                                                 u32 num_samples, u32 tile_width_spacing, +                                                 u32 level) { +    const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing); +    u32 offset = 0; +    for (u32 current_level = 0; current_level < level; ++current_level) { +        offset += CalculateLevelSize(info, current_level); +    } +    return offset; +} + +[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, +                                           u32 tile_size_y, u32 tile_width_spacing) { +    // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 +    if (tile_width_spacing > 0) { +        const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; +        return Common::AlignBits(size_bytes, alignment_log2); +    } +    const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); +    while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { +        --block.height; +    } +    while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) { +        --block.depth; +    } +    const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth; +    const u32 num_blocks = size_bytes >> block_shift; +    if (size_bytes != num_blocks << block_shift) { +        return (num_blocks + 1) << block_shift; +    } +    return size_bytes; +} + +[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info, +                                                                          const ImageBase& overlap, +                                                                          bool strict_size) { +    const ImageInfo& info = overlap.info; +    if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) { +        return std::nullopt; +    } +    if (new_info.block != info.block) { +        return std::nullopt; +    } +    const SubresourceExtent resources = new_info.resources; +    return SubresourceExtent{ +        .levels = std::max(resources.levels, info.resources.levels), +        .layers = std::max(resources.layers, info.resources.layers), +    }; +} + +[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( +    const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { +    const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); +    const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); +    const auto it = std::ranges::find(slice_offsets, diff); +    if (it == slice_offsets.end()) { +        return std::nullopt; +    } +    const std::vector subresources = CalculateSliceSubresources(new_info); +    const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; +    const ImageInfo& info = overlap.info; +    if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { +        return std::nullopt; +    } +    const u32 mip_depth = std::max(1U, new_info.size.depth << base.level); +    if (mip_depth < info.size.depth + base.layer) { +        return std::nullopt; +    } +    if (MipBlockSize(new_info, base.level) != info.block) { +        return std::nullopt; +    } +    return SubresourceExtent{ +        .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), +        .layers = 1, +    }; +} + +[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( +    const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { +    const u32 layer_stride = new_info.layer_stride; +    const s32 new_size = layer_stride * new_info.resources.layers; +    const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); +    if (diff > new_size) { +        return std::nullopt; +    } +    const s32 base_layer = diff / layer_stride; +    const s32 mip_offset = diff % layer_stride; +    const std::array offsets = CalculateMipLevelOffsets(new_info); +    const auto end = offsets.begin() + new_info.resources.levels; +    const auto it = std::find(offsets.begin(), end, mip_offset); +    if (it == end) { +        // Mipmap is not aligned to any valid size +        return std::nullopt; +    } +    const SubresourceBase base{ +        .level = static_cast<s32>(std::distance(offsets.begin(), it)), +        .layer = base_layer, +    }; +    const ImageInfo& info = overlap.info; +    if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { +        return std::nullopt; +    } +    if (MipBlockSize(new_info, base.level) != info.block) { +        return std::nullopt; +    } +    return SubresourceExtent{ +        .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), +        .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), +    }; +} + +[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info, +                                                                      GPUVAddr gpu_addr, +                                                                      VAddr cpu_addr, +                                                                      const ImageBase& overlap, +                                                                      bool strict_size) { +    std::optional<SubresourceExtent> resources; +    if (new_info.type != ImageType::e3D) { +        resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size); +    } else { +        resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size); +    } +    if (!resources) { +        return std::nullopt; +    } +    return OverlapResult{ +        .gpu_addr = gpu_addr, +        .cpu_addr = cpu_addr, +        .resources = *resources, +    }; +} + +[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info, +                                                                     GPUVAddr gpu_addr, +                                                                     VAddr cpu_addr, +                                                                     const ImageBase& overlap, +                                                                     bool strict_size) { +    const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr); +    if (!base) { +        return std::nullopt; +    } +    const ImageInfo& info = overlap.info; +    if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) { +        return std::nullopt; +    } +    if (new_info.block != MipBlockSize(info, base->level)) { +        return std::nullopt; +    } +    const SubresourceExtent resources = new_info.resources; +    s32 layers = 1; +    if (info.type != ImageType::e3D) { +        layers = std::max(resources.layers, info.resources.layers + base->layer); +    } +    return OverlapResult{ +        .gpu_addr = overlap.gpu_addr, +        .cpu_addr = overlap.cpu_addr, +        .resources = +            { +                .levels = std::max(resources.levels + base->level, info.resources.levels), +                .layers = layers, +            }, +    }; +} + +[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) { +    // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212 +    static constexpr u32 STRIDE_ALIGNMENT = 32; +    ASSERT(info.type == ImageType::Linear); +    const Extent2D num_tiles{ +        .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)), +        .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)), +    }; +    const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format); +    return Extent2D{ +        .width = Common::AlignUp(num_tiles.width, width_alignment), +        .height = num_tiles.height, +    }; +} + +[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) { +    // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176 +    ASSERT(info.type != ImageType::Linear); +    const Extent3D size = AdjustMipSize(info.size, level); +    const Extent3D num_tiles{ +        .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)), +        .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)), +        .depth = size.depth, +    }; +    const u32 bpp_log2 = BytesPerBlockLog2(info.format); +    const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); +    const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); +    return Extent3D{ +        .width = Common::AlignBits(num_tiles.width, alignment), +        .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), +        .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), +    }; +} + +[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept { +    u32 num_blocks = 0; +    for (s32 level = 0; level < info.resources.levels; ++level) { +        const Extent3D mip_size = AdjustMipSize(info.size, level); +        num_blocks += NumBlocks(mip_size, tile_size); +    } +    return num_blocks; +} + +[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept { +    ASSERT(info.type == ImageType::e3D); +    u32 num_slices = 0; +    for (s32 level = 0; level < info.resources.levels; ++level) { +        num_slices += AdjustMipSize(info.size.depth, level); +    } +    return num_slices; +} + +void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, +                             const ImageInfo& info, const BufferImageCopy& copy, +                             std::span<const u8> memory) { +    ASSERT(copy.image_offset.z == 0); +    ASSERT(copy.image_extent.depth == 1); +    ASSERT(copy.image_subresource.base_level == 0); +    ASSERT(copy.image_subresource.base_layer == 0); +    ASSERT(copy.image_subresource.num_layers == 1); + +    const u32 bytes_per_block = BytesPerBlock(info.format); +    const u32 row_length = copy.image_extent.width * bytes_per_block; +    const u32 guest_offset_x = copy.image_offset.x * bytes_per_block; + +    for (u32 line = 0; line < copy.image_extent.height; ++line) { +        const u32 host_offset_y = line * info.pitch; +        const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch; +        const u32 guest_offset = guest_offset_x + guest_offset_y; +        gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y, +                                    row_length); +    } +} + +void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, +                             const ImageInfo& info, const BufferImageCopy& copy, +                             std::span<const u8> input) { +    const Extent3D size = info.size; +    const LevelInfo level_info = MakeLevelInfo(info); +    const Extent2D tile_size = DefaultBlockSize(info.format); +    const u32 bytes_per_block = BytesPerBlock(info.format); + +    const s32 level = copy.image_subresource.base_level; +    const Extent3D level_size = AdjustMipSize(size, level); +    const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); +    const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; + +    UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + +    UNIMPLEMENTED_IF(copy.image_offset.x != 0); +    UNIMPLEMENTED_IF(copy.image_offset.y != 0); +    UNIMPLEMENTED_IF(copy.image_offset.z != 0); +    UNIMPLEMENTED_IF(copy.image_extent != level_size); + +    const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); +    const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + +    size_t host_offset = copy.buffer_offset; + +    const u32 num_levels = info.resources.levels; +    const std::array sizes = CalculateLevelSizes(level_info, num_levels); +    size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0); +    const size_t layer_stride = +        AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size, +                       level_info.block, tile_size.height, info.tile_width_spacing); +    const size_t subresource_size = sizes[level]; + +    const auto dst_data = std::make_unique<u8[]>(subresource_size); +    const std::span<u8> dst(dst_data.get(), subresource_size); + +    for (s32 layer = 0; layer < info.resources.layers; ++layer) { +        const std::span<const u8> src = input.subspan(host_offset); +        SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, +                       num_tiles.depth, block.height, block.depth); + +        gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + +        host_offset += host_bytes_per_layer; +        guest_offset += layer_stride; +    } +    ASSERT(host_offset - copy.buffer_offset == copy.buffer_size); +} + +} // Anonymous namespace + +u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept { +    if (info.type == ImageType::Buffer) { +        return info.size.width * BytesPerBlock(info.format); +    } +    if (info.type == ImageType::Linear) { +        return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); +    } +    if (info.resources.layers > 1) { +        ASSERT(info.layer_stride != 0); +        return info.layer_stride * info.resources.layers; +    } else { +        return CalculateLayerSize(info); +    } +} + +u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { +    if (info.type == ImageType::Buffer) { +        return info.size.width * BytesPerBlock(info.format); +    } +    if (info.num_samples > 1) { +        // Multisample images can't be uploaded or downloaded to the host +        return 0; +    } +    if (info.type == ImageType::Linear) { +        return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); +    } +    const Extent2D tile_size = DefaultBlockSize(info.format); +    return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format); +} + +u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { +    if (info.type == ImageType::Buffer) { +        return info.size.width * BytesPerBlock(info.format); +    } +    static constexpr Extent2D TILE_SIZE{1, 1}; +    return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; +} + +u32 CalculateLayerStride(const ImageInfo& info) noexcept { +    ASSERT(info.type != ImageType::Linear); +    const u32 layer_size = CalculateLayerSize(info); +    const Extent3D size = info.size; +    const Extent3D block = info.block; +    const u32 tile_size_y = DefaultBlockHeight(info.format); +    return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing); +} + +u32 CalculateLayerSize(const ImageInfo& info) noexcept { +    ASSERT(info.type != ImageType::Linear); +    return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples, +                                info.tile_width_spacing, info.resources.levels); +} + +std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept { +    ASSERT(info.resources.levels <= MAX_MIP_LEVELS); +    const LevelInfo level_info = MakeLevelInfo(info); +    std::array<u32, MAX_MIP_LEVELS> offsets{}; +    u32 offset = 0; +    for (s32 level = 0; level < info.resources.levels; ++level) { +        offsets[level] = offset; +        offset += CalculateLevelSize(level_info, level); +    } +    return offsets; +} + +std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { +    ASSERT(info.type == ImageType::e3D); +    std::vector<u32> offsets; +    offsets.reserve(NumSlices(info)); + +    const LevelInfo level_info = MakeLevelInfo(info); +    u32 mip_offset = 0; +    for (s32 level = 0; level < info.resources.levels; ++level) { +        const Extent3D tile_shift = TileShift(level_info, level); +        const Extent3D tiles = LevelTiles(level_info, level); +        const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT; +        const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift; +        const u32 z_mask = (1U << tile_shift.depth) - 1; +        const u32 depth = AdjustMipSize(info.size.depth, level); +        for (u32 slice = 0; slice < depth; ++slice) { +            const u32 z_low = slice & z_mask; +            const u32 z_high = slice & ~z_mask; +            offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size)); +        } +        mip_offset += CalculateLevelSize(level_info, level); +    } +    return offsets; +} + +std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { +    ASSERT(info.type == ImageType::e3D); +    std::vector<SubresourceBase> subresources; +    subresources.reserve(NumSlices(info)); +    for (s32 level = 0; level < info.resources.levels; ++level) { +        const s32 depth = AdjustMipSize(info.size.depth, level); +        for (s32 slice = 0; slice < depth; ++slice) { +            subresources.emplace_back(SubresourceBase{ +                .level = level, +                .layer = slice, +            }); +        } +    } +    return subresources; +} + +u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { +    const Extent2D tile_size = DefaultBlockSize(info.format); +    const Extent3D level_size = AdjustMipSize(info.size, level); +    const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); +    const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); +    const u32 bpp_log2 = BytesPerBlockLog2(info.format); +    return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); +} + +PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept { +    return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, +                                      config.a_type, config.srgb_conversion); +} + +ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { +    switch (info.type) { +    case ImageType::e2D: +        return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D; +    case ImageType::e3D: +        return ImageViewType::e2DArray; +    case ImageType::Linear: +        return ImageViewType::e2D; +    default: +        UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type)); +        return ImageViewType{}; +    } +} + +std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, +                                             SubresourceBase base) { +    ASSERT(dst.resources.levels >= src.resources.levels); +    ASSERT(dst.num_samples == src.num_samples); + +    const bool is_dst_3d = dst.type == ImageType::e3D; +    if (is_dst_3d) { +        ASSERT(src.type == ImageType::e3D); +        ASSERT(src.resources.levels == 1); +    } + +    std::vector<ImageCopy> copies; +    copies.reserve(src.resources.levels); +    for (s32 level = 0; level < src.resources.levels; ++level) { +        ImageCopy& copy = copies.emplace_back(); +        copy.src_subresource = SubresourceLayers{ +            .base_level = level, +            .base_layer = 0, +            .num_layers = src.resources.layers, +        }; +        copy.dst_subresource = SubresourceLayers{ +            .base_level = base.level + level, +            .base_layer = is_dst_3d ? 0 : base.layer, +            .num_layers = is_dst_3d ? 1 : src.resources.layers, +        }; +        copy.src_offset = Offset3D{ +            .x = 0, +            .y = 0, +            .z = 0, +        }; +        copy.dst_offset = Offset3D{ +            .x = 0, +            .y = 0, +            .z = is_dst_3d ? base.layer : 0, +        }; +        const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level); +        copy.extent = AdjustSamplesSize(mip_size, dst.num_samples); +        if (is_dst_3d) { +            copy.extent.depth = src.size.depth; +        } +    } +    return copies; +} + +bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { +    if (config.Address() == 0) { +        return false; +    } +    if (config.Address() > (u64(1) << 48)) { +        return false; +    } +    return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); +} + +std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, +                                            const ImageInfo& info, std::span<u8> output) { +    const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); +    const u32 bpp_log2 = BytesPerBlockLog2(info.format); +    const Extent3D size = info.size; + +    if (info.type == ImageType::Linear) { +        gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); + +        ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); +        return {{ +            .buffer_offset = 0, +            .buffer_size = guest_size_bytes, +            .buffer_row_length = info.pitch >> bpp_log2, +            .buffer_image_height = size.height, +            .image_subresource = +                { +                    .base_level = 0, +                    .base_layer = 0, +                    .num_layers = 1, +                }, +            .image_offset = {0, 0, 0}, +            .image_extent = size, +        }}; +    } +    const auto input_data = std::make_unique<u8[]>(guest_size_bytes); +    gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); +    const std::span<const u8> input(input_data.get(), guest_size_bytes); + +    const LevelInfo level_info = MakeLevelInfo(info); +    const s32 num_layers = info.resources.layers; +    const s32 num_levels = info.resources.levels; +    const Extent2D tile_size = DefaultBlockSize(info.format); +    const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); +    const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); +    const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0); +    const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height, +                                            info.tile_width_spacing); +    size_t guest_offset = 0; +    u32 host_offset = 0; +    std::vector<BufferImageCopy> copies(num_levels); + +    for (s32 level = 0; level < num_levels; ++level) { +        const Extent3D level_size = AdjustMipSize(size, level); +        const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); +        const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2; +        copies[level] = BufferImageCopy{ +            .buffer_offset = host_offset, +            .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers, +            .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width), +            .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height), +            .image_subresource = +                { +                    .base_level = level, +                    .base_layer = 0, +                    .num_layers = info.resources.layers, +                }, +            .image_offset = {0, 0, 0}, +            .image_extent = level_size, +        }; +        const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); +        const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); +        const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); +        size_t guest_layer_offset = 0; + +        for (s32 layer = 0; layer < info.resources.layers; ++layer) { +            const std::span<u8> dst = output.subspan(host_offset); +            const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset); +            UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height, +                             num_tiles.depth, block.height, block.depth, stride_alignment); +            guest_layer_offset += layer_stride; +            host_offset += host_bytes_per_layer; +        } +        guest_offset += level_sizes[level]; +    } +    return copies; +} + +BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, +                            const ImageBase& image, std::span<u8> output) { +    gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); +    return BufferCopy{ +        .src_offset = 0, +        .dst_offset = 0, +        .size = image.guest_size_bytes, +    }; +} + +void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, +                  std::span<BufferImageCopy> copies) { +    u32 output_offset = 0; + +    const Extent2D tile_size = DefaultBlockSize(info.format); +    for (BufferImageCopy& copy : copies) { +        const u32 level = copy.image_subresource.base_level; +        const Extent3D mip_size = AdjustMipSize(info.size, level); +        ASSERT(copy.image_offset == Offset3D{}); +        ASSERT(copy.image_subresource.base_layer == 0); +        ASSERT(copy.image_extent == mip_size); +        ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); +        ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); + +        if (IsPixelFormatASTC(info.format)) { +            ASSERT(copy.image_extent.depth == 1); +            Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), +                                             copy.image_extent.width, copy.image_extent.height, +                                             copy.image_subresource.num_layers, tile_size.width, +                                             tile_size.height, output.subspan(output_offset)); +        } else { +            DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, +                          output.subspan(output_offset)); +        } +        copy.buffer_offset = output_offset; +        copy.buffer_row_length = mip_size.width; +        copy.buffer_image_height = mip_size.height; + +        output_offset += copy.image_extent.width * copy.image_extent.height * +                         copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; +    } +} + +std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { +    const Extent3D size = info.size; +    const u32 bytes_per_block = BytesPerBlock(info.format); +    if (info.type == ImageType::Linear) { +        ASSERT(info.pitch % bytes_per_block == 0); +        return {{ +            .buffer_offset = 0, +            .buffer_size = static_cast<size_t>(info.pitch) * size.height, +            .buffer_row_length = info.pitch / bytes_per_block, +            .buffer_image_height = size.height, +            .image_subresource = +                { +                    .base_level = 0, +                    .base_layer = 0, +                    .num_layers = 1, +                }, +            .image_offset = {0, 0, 0}, +            .image_extent = size, +        }}; +    } +    UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + +    const s32 num_layers = info.resources.layers; +    const s32 num_levels = info.resources.levels; +    const Extent2D tile_size = DefaultBlockSize(info.format); + +    u32 host_offset = 0; + +    std::vector<BufferImageCopy> copies(num_levels); +    for (s32 level = 0; level < num_levels; ++level) { +        const Extent3D level_size = AdjustMipSize(size, level); +        const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); +        const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers; +        copies[level] = BufferImageCopy{ +            .buffer_offset = host_offset, +            .buffer_size = host_bytes_per_level, +            .buffer_row_length = level_size.width, +            .buffer_image_height = level_size.height, +            .image_subresource = +                { +                    .base_level = level, +                    .base_layer = 0, +                    .num_layers = info.resources.layers, +                }, +            .image_offset = {0, 0, 0}, +            .image_extent = level_size, +        }; +        host_offset += host_bytes_per_level; +    } +    return copies; +} + +Extent3D MipSize(Extent3D size, u32 level) { +    return AdjustMipSize(size, level); +} + +Extent3D MipBlockSize(const ImageInfo& info, u32 level) { +    const LevelInfo level_info = MakeLevelInfo(info); +    const Extent2D tile_size = DefaultBlockSize(info.format); +    const Extent3D level_size = AdjustMipSize(info.size, level); +    const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); +    return AdjustMipBlockSize(num_tiles, level_info.block, level); +} + +std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { +    const Extent2D tile_size = DefaultBlockSize(info.format); +    if (info.type == ImageType::Linear) { +        return std::vector{SwizzleParameters{ +            .num_tiles = AdjustTileSize(info.size, tile_size), +            .block = {}, +            .buffer_offset = 0, +            .level = 0, +        }}; +    } +    const LevelInfo level_info = MakeLevelInfo(info); +    const Extent3D size = info.size; +    const s32 num_levels = info.resources.levels; + +    u32 guest_offset = 0; +    std::vector<SwizzleParameters> params(num_levels); +    for (s32 level = 0; level < num_levels; ++level) { +        const Extent3D level_size = AdjustMipSize(size, level); +        const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); +        const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); +        params[level] = SwizzleParameters{ +            .num_tiles = num_tiles, +            .block = block, +            .buffer_offset = guest_offset, +            .level = level, +        }; +        guest_offset += CalculateLevelSize(level_info, level); +    } +    return params; +} + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, +                  std::span<const BufferImageCopy> copies, std::span<const u8> memory) { +    const bool is_pitch_linear = info.type == ImageType::Linear; +    for (const BufferImageCopy& copy : copies) { +        if (is_pitch_linear) { +            SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); +        } else { +            SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); +        } +    } +} + +bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level, +                                 u32 rhs_level, bool strict_size) noexcept { +    ASSERT(lhs.type != ImageType::Linear); +    ASSERT(rhs.type != ImageType::Linear); +    if (strict_size) { +        const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); +        const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); +        return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; +    } else { +        const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level); +        const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level); +        return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; +    } +} + +bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { +    ASSERT(lhs.type == ImageType::Linear); +    ASSERT(rhs.type == ImageType::Linear); +    if (strict_size) { +        return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height; +    } else { +        const Extent2D lhs_size = PitchLinearAlignedSize(lhs); +        const Extent2D rhs_size = PitchLinearAlignedSize(rhs); +        return lhs_size == rhs_size; +    } +} + +std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, +                                            VAddr cpu_addr, const ImageBase& overlap, +                                            bool strict_size) { +    ASSERT(new_info.type != ImageType::Linear); +    ASSERT(overlap.info.type != ImageType::Linear); +    if (!IsLayerStrideCompatible(new_info, overlap.info)) { +        return std::nullopt; +    } +    if (!IsViewCompatible(overlap.info.format, new_info.format)) { +        return std::nullopt; +    } +    if (gpu_addr == overlap.gpu_addr) { +        const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size); +        if (!solution) { +            return std::nullopt; +        } +        return OverlapResult{ +            .gpu_addr = gpu_addr, +            .cpu_addr = cpu_addr, +            .resources = *solution, +        }; +    } +    if (overlap.gpu_addr > gpu_addr) { +        return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); +    } +    // if overlap.gpu_addr < gpu_addr +    return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); +} + +bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { +    // If either of the layer strides is zero, we can assume they are compatible +    // These images generally come from rendertargets +    if (lhs.layer_stride == 0) { +        return true; +    } +    if (rhs.layer_stride == 0) { +        return true; +    } +    // It's definitely compatible if the layer stride matches +    if (lhs.layer_stride == rhs.layer_stride) { +        return true; +    } +    // Although we also have to compare for cases where it can be unaligned +    // This can happen if the image doesn't have layers, so the stride is not aligned +    if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) { +        return true; +    } +    return false; +} + +std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, +                                               GPUVAddr candidate_addr, RelaxedOptions options) { +    const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); +    if (!base) { +        return std::nullopt; +    } +    const ImageInfo& existing = image.info; +    if (False(options & RelaxedOptions::Format)) { +        if (!IsViewCompatible(existing.format, candidate.format)) { +            return std::nullopt; +        } +    } +    if (!IsLayerStrideCompatible(existing, candidate)) { +        return std::nullopt; +    } +    if (existing.type != candidate.type) { +        return std::nullopt; +    } +    if (False(options & RelaxedOptions::Samples)) { +        if (existing.num_samples != candidate.num_samples) { +            return std::nullopt; +        } +    } +    if (existing.resources.levels < candidate.resources.levels + base->level) { +        return std::nullopt; +    } +    if (existing.type == ImageType::e3D) { +        const u32 mip_depth = std::max(1U, existing.size.depth << base->level); +        if (mip_depth < candidate.size.depth + base->layer) { +            return std::nullopt; +        } +    } else { +        if (existing.resources.layers < candidate.resources.layers + base->layer) { +            return std::nullopt; +        } +    } +    const bool strict_size = False(options & RelaxedOptions::Size); +    if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { +        return std::nullopt; +    } +    // TODO: compare block sizes +    return base; +} + +bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, +                   RelaxedOptions options) { +    return FindSubresource(candidate, image, candidate_addr, options).has_value(); +} + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, +                      const ImageBase* src) { +    if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { +        src_info.format = src->info.format; +    } +    if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { +        dst_info.format = dst->info.format; +    } +    if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { +        dst_info.format = src->info.format; +    } +    if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { +        src_info.format = src->info.format; +    } +} + +u32 MapSizeBytes(const ImageBase& image) { +    if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { +        return image.guest_size_bytes; +    } else if (True(image.flags & ImageFlagBits::Converted)) { +        return image.converted_size_bytes; +    } else { +        return image.unswizzled_size_bytes; +    } +} + +using P = PixelFormat; + +static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); + +static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00); +static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) == +              0x50d200); + +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800); + +constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height, +                                u32 tile_width_spacing, u32 level) { +    const Extent3D size{width, height, 1}; +    const Extent3D block{0, block_height, 0}; +    const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level); +    return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing); +} + +static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800); +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000); +static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000); + +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000, +              "Tile width spacing is not working"); +static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000, +              "Compressed tile width spacing is not working"); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h new file mode 100644 index 000000000..dbbbd33cd --- /dev/null +++ b/src/video_core/texture_cache/util.h @@ -0,0 +1,107 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> +#include <span> + +#include "common/common_types.h" + +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; + +struct OverlapResult { +    GPUVAddr gpu_addr; +    VAddr cpu_addr; +    SubresourceExtent resources; +}; + +[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept; + +[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets( +    const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); + +[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); + +[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); + +[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC( +    const Tegra::Texture::TICEntry& config) noexcept; + +[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, +                                                           const ImageInfo& src, +                                                           SubresourceBase base); + +[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); + +[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, +                                                          GPUVAddr gpu_addr, const ImageInfo& info, +                                                          std::span<u8> output); + +[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, +                                          const ImageBase& image, std::span<u8> output); + +void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, +                  std::span<BufferImageCopy> copies); + +[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); + +[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); + +[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); + +[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, +                  std::span<const BufferImageCopy> copies, std::span<const u8> memory); + +[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, +                                               const ImageInfo& overlap_info, u32 new_level, +                                               u32 overlap_level, bool strict_size) noexcept; + +[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, +                                         bool strict_size) noexcept; + +[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, +                                                          GPUVAddr gpu_addr, VAddr cpu_addr, +                                                          const ImageBase& overlap, +                                                          bool strict_size); + +[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); + +[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, +                                                             const ImageBase& image, +                                                             GPUVAddr candidate_addr, +                                                             RelaxedOptions options); + +[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, +                                 GPUVAddr candidate_addr, RelaxedOptions options); + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, +                      const ImageBase* src); + +[[nodiscard]] u32 MapSizeBytes(const ImageBase& image); + +} // namespace VideoCommon diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -18,6 +18,7 @@  #include <algorithm>  #include <cassert>  #include <cstring> +#include <span>  #include <vector>  #include <boost/container/static_vector.hpp> @@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {      return params;  } -static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, +static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,                                u32 blockHeight) {      // Don't actually care about the void extent, just read the bits...      for (s32 i = 0; i < 4; ++i) { @@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block      }  } -static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { +static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {      for (u32 j = 0; j < blockHeight; j++) {          for (u32 i = 0; i < blockWidth; i++) {              outBuf[j * blockWidth + i] = 0xFFFF00FF; @@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,  #undef READ_INT_VALUES  } -static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, -                            u32* outBuf) { -    InputBitStream strm(inBuf); +static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, +                            const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { +    InputBitStream strm(inBuf.data());      TexelWeightParams weightParams = DecodeBlockInfo(strm);      // Was there an error? @@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32      }      // Read the texel weight data.. -    u8 texelWeightData[16]; -    memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); +    std::array<u8, 16> texelWeightData; +    std::ranges::copy(inBuf, texelWeightData.begin());      // Reverse everything      for (u32 i = 0; i < 8; i++) { @@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32      // Make sure that higher non-texel bits are set to zero      const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; -    texelWeightData[clearByteStart - 1] = -        texelWeightData[clearByteStart - 1] & -        static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); -    memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); +    if (clearByteStart > 0) { +        texelWeightData[clearByteStart - 1] &= +            static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); +    } +    std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));      IntegerEncodedVector texelWeightValues; -    InputBitStream weightStream(texelWeightData); +    InputBitStream weightStream(texelWeightData.data());      DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,                            weightParams.GetNumWeightValues()); @@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32  namespace Tegra::Texture::ASTC { -std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, -                           u32 block_height) { -    u32 blockIdx = 0; +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, +                uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { +    u32 block_index = 0;      std::size_t depth_offset = 0; -    std::vector<u8> outData(height * width * depth * 4); -    for (u32 k = 0; k < depth; k++) { -        for (u32 j = 0; j < height; j += block_height) { -            for (u32 i = 0; i < width; i += block_width) { - -                const u8* blockPtr = data + blockIdx * 16; +    for (u32 z = 0; z < depth; z++) { +        for (u32 y = 0; y < height; y += block_height) { +            for (u32 x = 0; x < width; x += block_width) { +                const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};                  // Blocks can be at most 12x12 -                u32 uncompData[144]; +                std::array<u32, 12 * 12> uncompData;                  ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); -                u32 decompWidth = std::min(block_width, width - i); -                u32 decompHeight = std::min(block_height, height - j); +                u32 decompWidth = std::min(block_width, width - x); +                u32 decompHeight = std::min(block_height, height - y); -                u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; +                const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);                  for (u32 jj = 0; jj < decompHeight; jj++) { -                    memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); +                    std::memcpy(outRow.data() + jj * width * 4, +                                uncompData.data() + jj * block_width, decompWidth * 4);                  } - -                blockIdx++; +                ++block_index;              }          }          depth_offset += height * width * 4;      } - -    return outData;  }  } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 991cdba72..9105119bc 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -5,11 +5,10 @@  #pragma once  #include <cstdint> -#include <vector>  namespace Tegra::Texture::ASTC { -std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, -                                uint32_t depth, uint32_t block_width, uint32_t block_height); +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, +                uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);  } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp deleted file mode 100644 index bd1aebf02..000000000 --- a/src/video_core/textures/convert.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <cstring> -#include <tuple> -#include <vector> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/surface.h" -#include "video_core/textures/astc.h" -#include "video_core/textures/convert.h" - -namespace Tegra::Texture { - -using VideoCore::Surface::PixelFormat; - -template <bool reverse> -void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { -    union S8Z24 { -        BitField<0, 24, u32> z24; -        BitField<24, 8, u32> s8; -    }; -    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); - -    union Z24S8 { -        BitField<0, 8, u32> s8; -        BitField<8, 24, u32> z24; -    }; -    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); - -    S8Z24 s8z24_pixel{}; -    Z24S8 z24s8_pixel{}; -    constexpr auto bpp{ -        VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; -    for (std::size_t y = 0; y < height; ++y) { -        for (std::size_t x = 0; x < width; ++x) { -            const std::size_t offset{bpp * (y * width + x)}; -            if constexpr (reverse) { -                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); -                s8z24_pixel.s8.Assign(z24s8_pixel.s8); -                s8z24_pixel.z24.Assign(z24s8_pixel.z24); -                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); -            } else { -                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); -                z24s8_pixel.s8.Assign(s8z24_pixel.s8); -                z24s8_pixel.z24.Assign(s8z24_pixel.z24); -                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); -            } -        } -    } -} - -static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { -    SwapS8Z24ToZ24S8<false>(data, width, height); -} - -static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { -    SwapS8Z24ToZ24S8<true>(data, width, height); -} - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, -                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { -    if (convert_astc && IsPixelFormatASTC(pixel_format)) { -        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. -        u32 block_width{}; -        u32 block_height{}; -        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); -        const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress( -            in_data, width, height, depth, block_width, block_height); -        std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); - -    } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { -        Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); -    } -} - -void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, -                            bool convert_astc, bool convert_s8z24) { -    if (convert_astc && IsPixelFormatASTC(pixel_format)) { -        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", -                     pixel_format); -        UNREACHABLE(); - -    } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { -        Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); -    } -} - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h deleted file mode 100644 index d5d6c77bb..000000000 --- a/src/video_core/textures/convert.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCore::Surface { -enum class PixelFormat; -} - -namespace Tegra::Texture { - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, -                            u32 width, u32 height, u32 depth, bool convert_astc, -                            bool convert_s8z24); - -void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, -                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24); - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 16d46a018..9f5181318 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -2,204 +2,111 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <array>  #include <cmath>  #include <cstring> +#include <span> +#include <utility> +  #include "common/alignment.h"  #include "common/assert.h"  #include "common/bit_util.h" +#include "common/div_ceil.h"  #include "video_core/gpu.h"  #include "video_core/textures/decoders.h"  #include "video_core/textures/texture.h"  namespace Tegra::Texture { -namespace { +namespace {  /** - * This table represents the internal swizzle of a gob, - * in format 16 bytes x 2 sector packing. + * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.   * Calculates the offset of an (x, y) position within a swizzled texture.   * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188   */ -template <std::size_t N, std::size_t M, u32 Align> -struct alignas(64) SwizzleTable { -    static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); -    constexpr SwizzleTable() { -        for (u32 y = 0; y < N; ++y) { -            for (u32 x = 0; x < M; ++x) { -                const u32 x2 = x * Align; -                values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + -                                                ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16)); -            } +constexpr SwizzleTable MakeSwizzleTableConst() { +    SwizzleTable table{}; +    for (u32 y = 0; y < table.size(); ++y) { +        for (u32 x = 0; x < table[0].size(); ++x) { +            table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + +                          (y % 2) * 16 + (x % 16);          }      } -    const std::array<u16, M>& operator[](std::size_t index) const { -        return values[index]; -    } -    std::array<std::array<u16, M>, N> values{}; -}; +    return table; +} -constexpr u32 FAST_SWIZZLE_ALIGN = 16; +constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); -constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); -constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); +template <bool TO_LINEAR> +void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, +             u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { +    // The origin of the transformation can be configured here, leave it as zero as the current API +    // doesn't expose it. +    static constexpr u32 origin_x = 0; +    static constexpr u32 origin_y = 0; +    static constexpr u32 origin_z = 0; -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, -                         const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, -                         const u32 y_end, const u32 z_end, const u32 tile_offset, -                         const u32 xy_block_size, const u32 layer_z, const u32 stride_x, -                         const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { -    std::array<u8*, 2> data_ptrs; -    u32 z_address = tile_offset; - -    for (u32 z = z_start; z < z_end; z++) { -        u32 y_address = z_address; -        u32 pixel_base = layer_z * z + y_start * stride_x; -        for (u32 y = y_start; y < y_end; y++) { -            const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; -            for (u32 x = x_start; x < x_end; x++) { -                const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; -                const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; -                data_ptrs[unswizzle] = swizzled_data + swizzle_offset; -                data_ptrs[!unswizzle] = unswizzled_data + pixel_index; -                std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); -            } -            pixel_base += stride_x; -            if ((y + 1) % GOB_SIZE_Y == 0) -                y_address += GOB_SIZE; -        } -        z_address += xy_block_size; -    } -} +    // We can configure here a custom pitch +    // As it's not exposed 'width * bpp' will be the expected pitch. +    const u32 pitch = width * bytes_per_pixel; +    const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, -                      const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, -                      const u32 y_end, const u32 z_end, const u32 tile_offset, -                      const u32 xy_block_size, const u32 layer_z, const u32 stride_x, -                      const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { -    std::array<u8*, 2> data_ptrs; -    u32 z_address = tile_offset; -    const u32 x_startb = x_start * bytes_per_pixel; -    const u32 x_endb = x_end * bytes_per_pixel; - -    for (u32 z = z_start; z < z_end; z++) { -        u32 y_address = z_address; -        u32 pixel_base = layer_z * z + y_start * stride_x; -        for (u32 y = y_start; y < y_end; y++) { -            const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; -            for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { -                const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; -                const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; -                const u32 pixel_index{out_x + pixel_base}; -                data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; -                data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; -                std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); -            } -            pixel_base += stride_x; -            if ((y + 1) % GOB_SIZE_Y == 0) -                y_address += GOB_SIZE; -        } -        z_address += xy_block_size; -    } -} +    const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); +    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); +    const u32 slice_size = +        Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; -/** - * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. - * The body of this function takes care of splitting the swizzled texture into blocks, - * and managing the extents of it. Once all the parameters of a single block are obtained, - * the function calls 'ProcessBlock' to process that particular Block. - * - * Documentation for the memory layout and decoding can be found at: - *  https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces - */ -template <bool fast> -void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, -                  const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, -                  const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, -                  const u32 width_spacing) { -    auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; -    const u32 stride_x = width * out_bytes_per_pixel; -    const u32 layer_z = height * stride_x; -    const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; -    constexpr u32 gob_elements_y = GOB_SIZE_Y; -    constexpr u32 gob_elements_z = GOB_SIZE_Z; -    const u32 block_x_elements = gob_elements_x; -    const u32 block_y_elements = gob_elements_y * block_height; -    const u32 block_z_elements = gob_elements_z * block_depth; -    const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); -    const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); -    const u32 blocks_on_y = div_ceil(height, block_y_elements); -    const u32 blocks_on_z = div_ceil(depth, block_z_elements); -    const u32 xy_block_size = GOB_SIZE * block_height; -    const u32 block_size = xy_block_size * block_depth; -    u32 tile_offset = 0; -    for (u32 zb = 0; zb < blocks_on_z; zb++) { -        const u32 z_start = zb * block_z_elements; -        const u32 z_end = std::min(depth, z_start + block_z_elements); -        for (u32 yb = 0; yb < blocks_on_y; yb++) { -            const u32 y_start = yb * block_y_elements; -            const u32 y_end = std::min(height, y_start + block_y_elements); -            for (u32 xb = 0; xb < blocks_on_x; xb++) { -                const u32 x_start = xb * block_x_elements; -                const u32 x_end = std::min(width, x_start + block_x_elements); -                if constexpr (fast) { -                    FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, -                                     z_start, x_end, y_end, z_end, tile_offset, xy_block_size, -                                     layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); -                } else { -                    PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, -                                        z_start, x_end, y_end, z_end, tile_offset, xy_block_size, -                                        layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); -                } -                tile_offset += block_size; +    const u32 block_height_mask = (1U << block_height) - 1; +    const u32 block_depth_mask = (1U << block_depth) - 1; +    const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; + +    for (u32 slice = 0; slice < depth; ++slice) { +        const u32 z = slice + origin_z; +        const u32 offset_z = (z >> block_depth) * slice_size + +                             ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); +        for (u32 line = 0; line < height; ++line) { +            const u32 y = line + origin_y; +            const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; + +            const u32 block_y = y >> GOB_SIZE_Y_SHIFT; +            const u32 offset_y = (block_y >> block_height) * block_size + +                                 ((block_y & block_height_mask) << GOB_SIZE_SHIFT); + +            for (u32 column = 0; column < width; ++column) { +                const u32 x = (column + origin_x) * bytes_per_pixel; +                const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; + +                const u32 base_swizzled_offset = offset_z + offset_y + offset_x; +                const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; + +                const u32 unswizzled_offset = +                    slice * pitch * height + line * pitch + column * bytes_per_pixel; + +                u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; +                const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; +                std::memcpy(dst, src, bytes_per_pixel);              }          }      }  } -  } // Anonymous namespace -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, -                      u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, -                      bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { -    const u32 block_height_size{1U << block_height}; -    const u32 block_depth_size{1U << block_depth}; -    if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { -        SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, -                           bytes_per_pixel, out_bytes_per_pixel, block_height_size, -                           block_depth_size, width_spacing); -    } else { -        SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, -                            bytes_per_pixel, out_bytes_per_pixel, block_height_size, -                            block_depth_size, width_spacing); -    } +SwizzleTable MakeSwizzleTable() { +    return SWIZZLE_TABLE;  } -void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, -                      u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, -                      u32 block_depth, u32 width_spacing) { -    CopySwizzledData((width + tile_size_x - 1) / tile_size_x, -                     (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, -                     bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, -                     width_spacing); +void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, +                      u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, +                      u32 stride_alignment) { +    Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, +                   stride_alignment);  } -std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, -                                 u32 width, u32 height, u32 depth, u32 block_height, -                                 u32 block_depth, u32 width_spacing) { -    std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); -    UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, -                     width, height, depth, block_height, block_depth, width_spacing); -    return unswizzled_data; +void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, +                    u32 height, u32 depth, u32 block_height, u32 block_depth, +                    u32 stride_alignment) { +    Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, +                  stride_alignment);  }  void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, @@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32          const u32 gob_address_y =              (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +              ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; -        const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; +        const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];          for (u32 x = 0; x < subrect_width; ++x) {              const u32 dst_x = x + offset_x;              const u32 gob_address = @@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,      const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);      const u32 block_height_mask = (1U << block_height) - 1; -    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; +    const u32 x_shift = GOB_SIZE_SHIFT + block_height;      for (u32 line = 0; line < line_count; ++line) {          const u32 src_y = line + origin_y; -        const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; +        const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];          const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;          const u32 src_offset_y = (block_y >> block_height) * block_size + @@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt      const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;      for (u32 line = 0; line < line_count; ++line) { -        const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; +        const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];          const u32 block_y = line / GOB_SIZE_Y;          const u32 dst_offset_y =              (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; @@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32          const std::size_t gob_address_y =              (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +              ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; -        const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; +        const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];          for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {              const std::size_t gob_address =                  gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 01e156bc8..d7cdc81e8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -4,7 +4,8 @@  #pragma once -#include <vector> +#include <span> +  #include "common/common_types.h"  #include "video_core/textures/texture.h" @@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;  constexpr u32 GOB_SIZE_Z = 1;  constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; -constexpr std::size_t GOB_SIZE_X_SHIFT = 6; -constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; -constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; -constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; - -/// Unswizzles a swizzled texture without changing its format. -void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, -                      u32 bytes_per_pixel, u32 width, u32 height, u32 depth, -                      u32 block_height = TICEntry::DefaultBlockHeight, -                      u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); - -/// Unswizzles a swizzled texture without changing its format. -std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, -                                 u32 width, u32 height, u32 depth, -                                 u32 block_height = TICEntry::DefaultBlockHeight, -                                 u32 block_depth = TICEntry::DefaultBlockHeight, -                                 u32 width_spacing = 0); - -/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, -                      u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, -                      bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); +constexpr u32 GOB_SIZE_X_SHIFT = 6; +constexpr u32 GOB_SIZE_Y_SHIFT = 3; +constexpr u32 GOB_SIZE_Z_SHIFT = 0; +constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; + +/// Returns a z-order swizzle table +SwizzleTable MakeSwizzleTable(); + +/// Unswizzles a block linear texture into linear memory. +void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, +                      u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, +                      u32 stride_alignment = 1); + +/// Swizzles linear memory into a block linear texture. +void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, +                    u32 height, u32 depth, u32 block_height, u32 block_depth, +                    u32 stride_alignment = 1);  /// This function calculates the correct size of a texture depending if it's tiled or not.  std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4171e3ef2..ae5621a7d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -5,9 +5,13 @@  #include <algorithm>  #include <array> +#include "common/cityhash.h"  #include "core/settings.h"  #include "video_core/textures/texture.h" +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +  namespace Tegra::Texture {  namespace { @@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {  } // Anonymous namespace -std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { +std::array<float, 4> TSCEntry::BorderColor() const noexcept {      if (!srgb_conversion) {          return border_color;      } @@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {              SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};  } -float TSCEntry::GetMaxAnisotropy() const noexcept { +float TSCEntry::MaxAnisotropy() const noexcept {      return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));  }  } // namespace Tegra::Texture + +size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept { +    return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic); +} + +size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept { +    return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc); +} diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index bbc7e3eaf..c1d14335e 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -53,27 +53,27 @@ enum class TextureFormat : u32 {      BC4 = 0x27,      BC5 = 0x28,      S8D24 = 0x29, -    X8Z24 = 0x2a, +    X8D24 = 0x2a,      D24S8 = 0x2b, -    X4V4Z24__COV4R4V = 0x2c, -    X4V4Z24__COV8R8V = 0x2d, -    V8Z24__COV4R12V = 0x2e, +    X4V4D24__COV4R4V = 0x2c, +    X4V4D24__COV8R8V = 0x2d, +    V8D24__COV4R12V = 0x2e,      D32 = 0x2f,      D32S8 = 0x30, -    X8Z24_X20V4S8__COV4R4V = 0x31, -    X8Z24_X20V4S8__COV8R8V = 0x32, -    ZF32_X20V4X8__COV4R4V = 0x33, -    ZF32_X20V4X8__COV8R8V = 0x34, -    ZF32_X20V4S8__COV4R4V = 0x35, -    ZF32_X20V4S8__COV8R8V = 0x36, -    X8Z24_X16V8S8__COV4R12V = 0x37, -    ZF32_X16V8X8__COV4R12V = 0x38, -    ZF32_X16V8S8__COV4R12V = 0x39, +    X8D24_X20V4S8__COV4R4V = 0x31, +    X8D24_X20V4S8__COV8R8V = 0x32, +    D32_X20V4X8__COV4R4V = 0x33, +    D32_X20V4X8__COV8R8V = 0x34, +    D32_X20V4S8__COV4R4V = 0x35, +    D32_X20V4S8__COV8R8V = 0x36, +    X8D24_X16V8S8__COV4R12V = 0x37, +    D32_X16V8X8__COV4R12V = 0x38, +    D32_X16V8S8__COV4R12V = 0x39,      D16 = 0x3a, -    V8Z24__COV8R24V = 0x3b, -    X8Z24_X16V8S8__COV8R24V = 0x3c, -    ZF32_X16V8X8__COV8R24V = 0x3d, -    ZF32_X16V8S8__COV8R24V = 0x3e, +    V8D24__COV8R24V = 0x3b, +    X8D24_X16V8S8__COV8R24V = 0x3c, +    D32_X16V8X8__COV8R24V = 0x3d, +    D32_X16V8S8__COV8R24V = 0x3e,      ASTC_2D_4X4 = 0x40,      ASTC_2D_5X5 = 0x41,      ASTC_2D_6X6 = 0x42, @@ -146,7 +146,7 @@ enum class MsaaMode : u32 {  };  union TextureHandle { -    /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} +    /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}      u32 raw;      BitField<0, 20, u32> tic_id; @@ -155,124 +155,124 @@ union TextureHandle {  static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");  struct TICEntry { -    static constexpr u32 DefaultBlockHeight = 16; -    static constexpr u32 DefaultBlockDepth = 1; - -    union { -        u32 raw; -        BitField<0, 7, TextureFormat> format; -        BitField<7, 3, ComponentType> r_type; -        BitField<10, 3, ComponentType> g_type; -        BitField<13, 3, ComponentType> b_type; -        BitField<16, 3, ComponentType> a_type; - -        BitField<19, 3, SwizzleSource> x_source; -        BitField<22, 3, SwizzleSource> y_source; -        BitField<25, 3, SwizzleSource> z_source; -        BitField<28, 3, SwizzleSource> w_source; -    }; -    u32 address_low;      union { -        BitField<0, 16, u32> address_high; -        BitField<21, 3, TICHeaderVersion> header_version; -    }; -    union { -        BitField<0, 3, u32> block_width; -        BitField<3, 3, u32> block_height; -        BitField<6, 3, u32> block_depth; +        struct { +            union { +                BitField<0, 7, TextureFormat> format; +                BitField<7, 3, ComponentType> r_type; +                BitField<10, 3, ComponentType> g_type; +                BitField<13, 3, ComponentType> b_type; +                BitField<16, 3, ComponentType> a_type; + +                BitField<19, 3, SwizzleSource> x_source; +                BitField<22, 3, SwizzleSource> y_source; +                BitField<25, 3, SwizzleSource> z_source; +                BitField<28, 3, SwizzleSource> w_source; +            }; +            u32 address_low; +            union { +                BitField<0, 16, u32> address_high; +                BitField<16, 5, u32> layer_base_3_7; +                BitField<21, 3, TICHeaderVersion> header_version; +                BitField<24, 1, u32> load_store_hint; +                BitField<25, 4, u32> view_coherency_hash; +                BitField<29, 3, u32> layer_base_8_10; +            }; +            union { +                BitField<0, 3, u32> block_width; +                BitField<3, 3, u32> block_height; +                BitField<6, 3, u32> block_depth; -        BitField<10, 3, u32> tile_width_spacing; +                BitField<10, 3, u32> tile_width_spacing; -        // High 16 bits of the pitch value -        BitField<0, 16, u32> pitch_high; -        BitField<26, 1, u32> use_header_opt_control; -        BitField<27, 1, u32> depth_texture; -        BitField<28, 4, u32> max_mip_level; +                // High 16 bits of the pitch value +                BitField<0, 16, u32> pitch_high; +                BitField<26, 1, u32> use_header_opt_control; +                BitField<27, 1, u32> depth_texture; +                BitField<28, 4, u32> max_mip_level; -        BitField<0, 16, u32> buffer_high_width_minus_one; -    }; -    union { -        BitField<0, 16, u32> width_minus_1; -        BitField<22, 1, u32> srgb_conversion; -        BitField<23, 4, TextureType> texture_type; -        BitField<29, 3, u32> border_size; +                BitField<0, 16, u32> buffer_high_width_minus_one; +            }; +            union { +                BitField<0, 16, u32> width_minus_one; +                BitField<16, 3, u32> layer_base_0_2; +                BitField<22, 1, u32> srgb_conversion; +                BitField<23, 4, TextureType> texture_type; +                BitField<29, 3, u32> border_size; -        BitField<0, 16, u32> buffer_low_width_minus_one; -    }; -    union { -        BitField<0, 16, u32> height_minus_1; -        BitField<16, 14, u32> depth_minus_1; -    }; -    union { -        BitField<6, 13, u32> mip_lod_bias; -        BitField<27, 3, u32> max_anisotropy; +                BitField<0, 16, u32> buffer_low_width_minus_one; +            }; +            union { +                BitField<0, 16, u32> height_minus_1; +                BitField<16, 14, u32> depth_minus_1; +                BitField<30, 1, u32> is_sparse; +                BitField<31, 1, u32> normalized_coords; +            }; +            union { +                BitField<6, 13, u32> mip_lod_bias; +                BitField<27, 3, u32> max_anisotropy; +            }; +            union { +                BitField<0, 4, u32> res_min_mip_level; +                BitField<4, 4, u32> res_max_mip_level; +                BitField<8, 4, MsaaMode> msaa_mode; +                BitField<12, 12, u32> min_lod_clamp; +            }; +        }; +        std::array<u64, 4> raw;      }; -    union { -        BitField<0, 4, u32> res_min_mip_level; -        BitField<4, 4, u32> res_max_mip_level; -        BitField<8, 4, MsaaMode> msaa_mode; -        BitField<12, 12, u32> min_lod_clamp; -    }; +    constexpr bool operator==(const TICEntry& rhs) const noexcept { +        return raw == rhs.raw; +    } -    GPUVAddr Address() const { +    constexpr bool operator!=(const TICEntry& rhs) const noexcept { +        return raw != rhs.raw; +    } + +    constexpr GPUVAddr Address() const {          return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);      } -    u32 Pitch() const { +    constexpr u32 Pitch() const {          ASSERT(header_version == TICHeaderVersion::Pitch ||                 header_version == TICHeaderVersion::PitchColorKey);          // The pitch value is 21 bits, and is 32B aligned.          return pitch_high << 5;      } -    u32 Width() const { +    constexpr u32 Width() const {          if (header_version != TICHeaderVersion::OneDBuffer) { -            return width_minus_1 + 1; +            return width_minus_one + 1;          } -        return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; +        return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;      } -    u32 Height() const { +    constexpr u32 Height() const {          return height_minus_1 + 1;      } -    u32 Depth() const { +    constexpr u32 Depth() const {          return depth_minus_1 + 1;      } -    u32 BlockWidth() const { -        ASSERT(IsTiled()); -        return block_width; -    } - -    u32 BlockHeight() const { -        ASSERT(IsTiled()); -        return block_height; -    } - -    u32 BlockDepth() const { -        ASSERT(IsTiled()); -        return block_depth; +    constexpr u32 BaseLayer() const { +        return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;      } -    bool IsTiled() const { +    constexpr bool IsBlockLinear() const {          return header_version == TICHeaderVersion::BlockLinear ||                 header_version == TICHeaderVersion::BlockLinearColorKey;      } -    bool IsLineal() const { +    constexpr bool IsPitchLinear() const {          return header_version == TICHeaderVersion::Pitch ||                 header_version == TICHeaderVersion::PitchColorKey;      } -    bool IsBuffer() const { +    constexpr bool IsBuffer() const {          return header_version == TICHeaderVersion::OneDBuffer;      } - -    bool IsSrgbConversionEnabled() const { -        return srgb_conversion != 0; -    }  };  static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); @@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {      Linear = 3,  }; +enum class SamplerReduction : u32 { +    WeightedAverage = 0, +    Min = 1, +    Max = 2, +}; +  enum class Anisotropy {      Default,      Filter2x, @@ -333,8 +339,12 @@ struct TSCEntry {                  BitField<0, 2, TextureFilter> mag_filter;                  BitField<4, 2, TextureFilter> min_filter;                  BitField<6, 2, TextureMipmapFilter> mipmap_filter; +                BitField<8, 1, u32> cubemap_anisotropy;                  BitField<9, 1, u32> cubemap_interface_filtering; +                BitField<10, 2, SamplerReduction> reduction_filter;                  BitField<12, 13, u32> mip_lod_bias; +                BitField<25, 1, u32> float_coord_normalization; +                BitField<26, 5, u32> trilin_opt;              };              union {                  BitField<0, 12, u32> min_lod_clamp; @@ -347,32 +357,45 @@ struct TSCEntry {              };              std::array<f32, 4> border_color;          }; -        std::array<u8, 0x20> raw; +        std::array<u64, 4> raw;      }; -    std::array<float, 4> GetBorderColor() const noexcept; +    constexpr bool operator==(const TSCEntry& rhs) const noexcept { +        return raw == rhs.raw; +    } + +    constexpr bool operator!=(const TSCEntry& rhs) const noexcept { +        return raw != rhs.raw; +    } + +    std::array<float, 4> BorderColor() const noexcept; -    float GetMaxAnisotropy() const noexcept; +    float MaxAnisotropy() const noexcept; -    float GetMinLod() const { +    float MinLod() const {          return static_cast<float>(min_lod_clamp) / 256.0f;      } -    float GetMaxLod() const { +    float MaxLod() const {          return static_cast<float>(max_lod_clamp) / 256.0f;      } -    float GetLodBias() const { +    float LodBias() const {          // Sign extend the 13-bit value. -        constexpr u32 mask = 1U << (13 - 1); +        static constexpr u32 mask = 1U << (13 - 1);          return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;      }  };  static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); -struct FullTextureInfo { -    TICEntry tic; -    TSCEntry tsc; +} // namespace Tegra::Texture + +template <> +struct std::hash<Tegra::Texture::TICEntry> { +    size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;  }; -} // namespace Tegra::Texture +template <> +struct std::hash<Tegra::Texture::TSCEntry> { +    size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept; +};  | 
