diff options
Diffstat (limited to 'src/video_core')
28 files changed, 698 insertions, 130 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91a30fef7..6a6325e38 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(host_shaders)  if(LIBVA_FOUND)      set_source_files_properties(command_classes/codecs/codec.cpp          PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) +    list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})  endif()  add_library(video_core STATIC diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 916277811..2a532b883 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -2,6 +2,8 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <algorithm> +#include <cstdio>  #include <fstream>  #include <vector>  #include "common/assert.h" @@ -15,12 +17,28 @@  extern "C" {  #include <libavutil/opt.h> +#ifdef LIBVA_FOUND +// for querying VAAPI driver information +#include <libavutil/hwcontext_vaapi.h> +#endif  }  namespace Tegra {  namespace {  constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;  constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; +constexpr std::array PREFERRED_GPU_DECODERS = { +    AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 +    AV_HWDEVICE_TYPE_D3D11VA, +    AV_HWDEVICE_TYPE_DXVA2, +#elif defined(__linux__) +    AV_HWDEVICE_TYPE_VAAPI, +    AV_HWDEVICE_TYPE_VDPAU, +#endif +    // last resort for Linux Flatpak (w/ NVIDIA) +    AV_HWDEVICE_TYPE_VULKAN, +};  void AVPacketDeleter(AVPacket* ptr) {      av_packet_free(&ptr); @@ -59,46 +77,50 @@ Codec::~Codec() {      av_buffer_unref(&av_gpu_decoder);  } +// List all the currently available hwcontext in ffmpeg +static std::vector<AVHWDeviceType> ListSupportedContexts() { +    std::vector<AVHWDeviceType> contexts{}; +    AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; +    do { +        current_device_type = av_hwdevice_iterate_types(current_device_type); +        contexts.push_back(current_device_type); +    } while (current_device_type != AV_HWDEVICE_TYPE_NONE); +    return contexts; +} +  bool Codec::CreateGpuAvDevice() { -#if defined(LIBVA_FOUND) -    static constexpr std::array<const char*, 3> VAAPI_DRIVERS = { -        "i915", -        "iHD", -        "amdgpu", -    }; -    AVDictionary* hwdevice_options = nullptr; -    av_dict_set(&hwdevice_options, "connection_type", "drm", 0); -    for (const auto& driver : VAAPI_DRIVERS) { -        av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); -        const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, -                                                          nullptr, hwdevice_options, 0); -        if (hwdevice_error >= 0) { -            LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); -            av_dict_free(&hwdevice_options); -            av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; -            return true; -        } -        LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); -    } -    LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); -    av_dict_free(&hwdevice_options); -#endif      static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; -    static constexpr std::array GPU_DECODER_TYPES{ -        AV_HWDEVICE_TYPE_CUDA, -#ifdef _WIN32 -        AV_HWDEVICE_TYPE_D3D11VA, -#else -        AV_HWDEVICE_TYPE_VDPAU, -#endif -    }; -    for (const auto& type : GPU_DECODER_TYPES) { +    static const auto supported_contexts = ListSupportedContexts(); +    for (const auto& type : PREFERRED_GPU_DECODERS) { +        if (std::none_of(supported_contexts.begin(), supported_contexts.end(), +                         [&type](const auto& context) { return context == type; })) { +            LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); +            continue; +        }          const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);          if (hwdevice_res < 0) {              LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",                        av_hwdevice_get_type_name(type), hwdevice_res);              continue;          } +#ifdef LIBVA_FOUND +        if (type == AV_HWDEVICE_TYPE_VAAPI) { +            // we need to determine if this is an impersonated VAAPI driver +            AVHWDeviceContext* hwctx = +                static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data)); +            AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx); +            const char* vendor_name = vaQueryVendorString(vactx->display); +            if (strstr(vendor_name, "VDPAU backend")) { +                // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them +                LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver"); +                continue; +            } else { +                // according to some user testing, certain vaapi driver (Intel?) could be buggy +                // so let's log the driver name which may help the developers/supporters +                LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name); +            } +        } +#endif          for (int i = 0;; i++) {              const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);              if (!config) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ab7c21a49..8788f5148 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -185,16 +185,6 @@ struct GPU::Impl {          return *dma_pusher;      } -    /// Returns a reference to the GPU CDMA pusher. -    [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { -        return *cdma_pusher; -    } - -    /// Returns a const reference to the GPU CDMA pusher. -    [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { -        return *cdma_pusher; -    } -      /// Returns a reference to the underlying renderer.      [[nodiscard]] VideoCore::RendererBase& Renderer() {          return *renderer; @@ -338,25 +328,27 @@ struct GPU::Impl {      }      /// Push GPU command buffer entries to be processed -    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { +    void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {          if (!use_nvdec) {              return;          } -        if (!cdma_pusher) { -            cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu); +        if (!cdma_pushers.contains(id)) { +            cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu));          }          // SubmitCommandBuffer would make the nvdec operations async, this is not currently working          // TODO(ameerj): RE proper async nvdec operation          // gpu_thread.SubmitCommandBuffer(std::move(entries)); - -        cdma_pusher->ProcessEntries(std::move(entries)); +        cdma_pushers[id]->ProcessEntries(std::move(entries));      }      /// Frees the CDMAPusher instance to free up resources -    void ClearCdmaInstance() { -        cdma_pusher.reset(); +    void ClearCdmaInstance(u32 id) { +        const auto iter = cdma_pushers.find(id); +        if (iter != cdma_pushers.end()) { +            cdma_pushers.erase(iter); +        }      }      /// Swap buffers (render frame) @@ -659,7 +651,7 @@ struct GPU::Impl {      Core::System& system;      std::unique_ptr<Tegra::MemoryManager> memory_manager;      std::unique_ptr<Tegra::DmaPusher> dma_pusher; -    std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; +    std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;      std::unique_ptr<VideoCore::RendererBase> renderer;      VideoCore::RasterizerInterface* rasterizer = nullptr;      const bool use_nvdec; @@ -811,14 +803,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const {      return impl->DmaPusher();  } -Tegra::CDmaPusher& GPU::CDmaPusher() { -    return impl->CDmaPusher(); -} - -const Tegra::CDmaPusher& GPU::CDmaPusher() const { -    return impl->CDmaPusher(); -} -  VideoCore::RendererBase& GPU::Renderer() {      return impl->Renderer();  } @@ -887,12 +871,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) {      impl->PushGPUEntries(std::move(entries));  } -void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { -    impl->PushCommandBuffer(entries); +void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { +    impl->PushCommandBuffer(id, entries);  } -void GPU::ClearCdmaInstance() { -    impl->ClearCdmaInstance(); +void GPU::ClearCdmaInstance(u32 id) { +    impl->ClearCdmaInstance(id);  }  void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 05e5c94f3..500411176 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -83,6 +83,7 @@ enum class DepthFormat : u32 {      S8_UINT_Z24_UNORM = 0x14,      D24X8_UNORM = 0x15,      D24S8_UNORM = 0x16, +    S8_UINT = 0x17,      D24C8_UNORM = 0x18,      D32_FLOAT_S8X24_UINT = 0x19,  }; @@ -241,10 +242,10 @@ public:      void PushGPUEntries(Tegra::CommandList&& entries);      /// Push GPU command buffer entries to be processed -    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); +    void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);      /// Frees the CDMAPusher instance to free up resources -    void ClearCdmaInstance(); +    void ClearCdmaInstance(u32 id);      /// Swap buffers (render frame)      void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index d779a967a..fd3e41434 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -10,6 +10,8 @@ set(SHADER_FILES      astc_decoder.comp      block_linear_unswizzle_2d.comp      block_linear_unswizzle_3d.comp +    convert_abgr8_to_d24s8.frag +    convert_d24s8_to_abgr8.frag      convert_depth_to_float.frag      convert_float_to_depth.frag      full_screen_triangle.vert diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag new file mode 100644 index 000000000..ea055ddad --- /dev/null +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +#extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { +    ivec2 coord = ivec2(gl_FragCoord.xy); +    uvec4 color = uvec4(texelFetch(color_texture, coord, 0).abgr * (exp2(8) - 1.0f)); +    uvec4 bytes = color << uvec4(24, 16, 8, 0); +    uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w; + +    gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); +    gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); +} diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag new file mode 100644 index 000000000..94368fb59 --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { +    ivec2 coord = ivec2(gl_FragCoord.xy); +    uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); +    uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + +    highp uint depth_val = +        uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); +    lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; +    highp uvec4 components = +        uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); +    color.abgr = vec4(components) / (exp2(8.0) - 1.0); +} diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6956535e5..14e6522f2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,6 +9,7 @@  #include <glad/glad.h> +#include "common/bit_util.h"  #include "common/literals.h"  #include "common/settings.h"  #include "video_core/renderer_opengl/gl_device.h" @@ -148,6 +149,8 @@ GLenum AttachmentType(PixelFormat format) {      switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {      case SurfaceType::Depth:          return GL_DEPTH_ATTACHMENT; +    case SurfaceType::Stencil: +        return GL_STENCIL_ATTACHMENT;      case SurfaceType::DepthStencil:          return GL_DEPTH_STENCIL_ATTACHMENT;      default: @@ -317,13 +320,12 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {      }  } -OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { +OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format, +                     GLsizei gl_num_levels) {      const GLenum target = ImageTarget(info);      const GLsizei width = info.size.width;      const GLsizei height = info.size.height;      const GLsizei depth = info.size.depth; -    const int max_host_mip_levels = std::bit_width(info.size.width); -    const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);      const GLsizei num_layers = info.resources.layers;      const GLsizei num_samples = info.num_samples; @@ -335,10 +337,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form      }      switch (target) {      case GL_TEXTURE_1D_ARRAY: -        glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); +        glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, num_layers);          break;      case GL_TEXTURE_2D_ARRAY: -        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); +        glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, num_layers);          break;      case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {          // TODO: Where should 'fixedsamplelocations' come from? @@ -348,10 +350,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form          break;      }      case GL_TEXTURE_RECTANGLE: -        glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); +        glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, height);          break;      case GL_TEXTURE_3D: -        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); +        glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, depth);          break;      case GL_TEXTURE_BUFFER:          UNREACHABLE(); @@ -397,9 +399,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form      return GL_R32UI;  } -[[nodiscard]] u32 NextPow2(u32 value) { -    return 1U << (32U - std::countl_zero(value - 1U)); -}  } // Anonymous namespace  ImageBufferMap::~ImageBufferMap() { @@ -526,8 +525,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,      }  } -void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, -                                       std::span<const VideoCommon::ImageCopy> copies) { +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, +                                           std::span<const VideoCommon::ImageCopy> copies) {      LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);      format_conversion_pass.ConvertImage(dst, src, copies);  } @@ -696,7 +695,9 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,          gl_format = tuple.format;          gl_type = tuple.type;      } -    texture = MakeImage(info, gl_internal_format); +    const int max_host_mip_levels = std::bit_width(info.size.width); +    gl_num_levels = std::min(info.resources.levels, max_host_mip_levels); +    texture = MakeImage(info, gl_internal_format, gl_num_levels);      current_texture = texture.handle;      if (runtime->device.HasDebuggingToolAttached()) {          const std::string name = VideoCommon::Name(*this); @@ -724,6 +725,9 @@ void Image::UploadMemory(const ImageBufferMap& map,      u32 current_image_height = std::numeric_limits<u32>::max();      for (const VideoCommon::BufferImageCopy& copy : copies) { +        if (copy.image_subresource.base_level >= gl_num_levels) { +            continue; +        }          if (current_row_length != copy.buffer_row_length) {              current_row_length = copy.buffer_row_length;              glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); @@ -753,6 +757,9 @@ void Image::DownloadMemory(ImageBufferMap& map,      u32 current_image_height = std::numeric_limits<u32>::max();      for (const VideoCommon::BufferImageCopy& copy : copies) { +        if (copy.image_subresource.base_level >= gl_num_levels) { +            continue; +        }          if (current_row_length != copy.buffer_row_length) {              current_row_length = copy.buffer_row_length;              glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); @@ -792,7 +799,7 @@ GLuint Image::StorageHandle() noexcept {          }          store_view.Create();          glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, -                      info.resources.levels, 0, info.resources.layers); +                      gl_num_levels, 0, info.resources.layers);          return store_view.handle;      default:          return current_texture; @@ -907,6 +914,8 @@ void Image::Scale(bool up_scale) {              return GL_COLOR_ATTACHMENT0;          case SurfaceType::Depth:              return GL_DEPTH_ATTACHMENT; +        case SurfaceType::Stencil: +            return GL_STENCIL_ATTACHMENT;          case SurfaceType::DepthStencil:              return GL_DEPTH_STENCIL_ATTACHMENT;          default: @@ -920,8 +929,10 @@ void Image::Scale(bool up_scale) {              return GL_COLOR_BUFFER_BIT;          case SurfaceType::Depth:              return GL_DEPTH_BUFFER_BIT; +        case SurfaceType::Stencil: +            return GL_STENCIL_BUFFER_BIT;          case SurfaceType::DepthStencil: -            return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT; +            return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;          default:              UNREACHABLE();              return GL_COLOR_BUFFER_BIT; @@ -933,8 +944,10 @@ void Image::Scale(bool up_scale) {              return 0;          case SurfaceType::Depth:              return 1; -        case SurfaceType::DepthStencil: +        case SurfaceType::Stencil:              return 2; +        case SurfaceType::DepthStencil: +            return 3;          default:              UNREACHABLE();              return 0; @@ -956,7 +969,7 @@ void Image::Scale(bool up_scale) {          auto dst_info = info;          dst_info.size.width = scaled_width;          dst_info.size.height = scaled_height; -        upscaled_backup = MakeImage(dst_info, gl_internal_format); +        upscaled_backup = MakeImage(dst_info, gl_internal_format, gl_num_levels);      }      const u32 src_width = up_scale ? original_width : scaled_width;      const u32 src_height = up_scale ? original_height : scaled_height; @@ -1264,10 +1277,20 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM      }      if (const ImageView* const image_view = depth_buffer; image_view) { -        if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { +        switch (GetFormatType(image_view->format)) { +        case SurfaceType::Depth: +            buffer_bits |= GL_DEPTH_BUFFER_BIT; +            break; +        case SurfaceType::Stencil: +            buffer_bits |= GL_STENCIL_BUFFER_BIT; +            break; +        case SurfaceType::DepthStencil:              buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; -        } else { +            break; +        default: +            UNREACHABLE();              buffer_bits |= GL_DEPTH_BUFFER_BIT; +            break;          }          const GLenum attachment = AttachmentType(image_view->format);          AttachTexture(handle, attachment, image_view); @@ -1308,7 +1331,7 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,          const u32 copy_size = region.width * region.height * region.depth * img_bpp;          if (pbo_size < copy_size) {              intermediate_pbo.Create(); -            pbo_size = NextPow2(copy_size); +            pbo_size = Common::NextPow2(copy_size);              glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY);          }          // Copy from source to PBO diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 578f8d523..37d5e6a6b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -84,9 +84,13 @@ public:      u64 GetDeviceLocalMemory() const; +    bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { +        return true; +    } +      void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); -    void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +    void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);      void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {          UNIMPLEMENTED(); @@ -164,8 +168,8 @@ private:      std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; -    std::array<OGLFramebuffer, 3> rescale_draw_fbos; -    std::array<OGLFramebuffer, 3> rescale_read_fbos; +    std::array<OGLFramebuffer, 4> rescale_draw_fbos; +    std::array<OGLFramebuffer, 4> rescale_read_fbos;      const Settings::ResolutionScalingInfo& resolution;  }; @@ -221,6 +225,7 @@ private:      GLenum gl_internal_format = GL_NONE;      GLenum gl_format = GL_NONE;      GLenum gl_type = GL_NONE; +    GLsizei gl_num_levels{};      TextureCacheRuntime* runtime{};      GLuint current_texture{};  }; @@ -338,7 +343,6 @@ struct TextureCacheParams {      static constexpr bool FRAMEBUFFER_BLITS = true;      static constexpr bool HAS_EMULATED_COPIES = true;      static constexpr bool HAS_DEVICE_MEMORY_INFO = true; -    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true;      using Runtime = OpenGL::TextureCacheRuntime;      using Image = OpenGL::Image; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 39158aa3e..daba42ed9 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -108,6 +108,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB      {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                // E5B9G9R9_FLOAT      {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},            // D32_FLOAT      {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},    // D16_UNORM +    {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE},                // S8_UINT      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // D24_UNORM_S8_UINT      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // S8_UINT_D24_UNORM      {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b3884a4f5..9a38b6b34 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,6 +4,8 @@  #include <algorithm> +#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"  #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"  #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"  #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" @@ -354,6 +356,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,        blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),        convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),        convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), +      convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), +      convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),        linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),        nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {      if (device.IsExtShaderStencilExportSupported()) { @@ -448,6 +452,22 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,      Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);  } +void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, +                                          ImageView& src_image_view, u32 up_scale, u32 down_shift) { +    ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), +                                 convert_abgr8_to_d24s8_frag, true); +    ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, +                 down_shift); +} + +void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, +                                          ImageView& src_image_view, u32 up_scale, u32 down_shift) { +    ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), +                                 convert_d24s8_to_abgr8_frag, false); +    ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, +                        down_shift); +} +  void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,                                const ImageView& src_image_view, u32 up_scale, u32 down_shift) {      const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -495,6 +515,101 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb      scheduler.InvalidateState();  } +void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, +                                   ImageView& src_image_view, u32 up_scale, u32 down_shift) { +    const VkPipelineLayout layout = *one_texture_pipeline_layout; +    const VkImageView src_view = src_image_view.ColorView(); +    const VkSampler sampler = *nearest_sampler; +    const VkExtent2D extent{ +        .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), +        .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), +    }; +    scheduler.RequestRenderpass(dst_framebuffer); +    scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, +                      this](vk::CommandBuffer cmdbuf) { +        const VkOffset2D offset{ +            .x = 0, +            .y = 0, +        }; +        const VkViewport viewport{ +            .x = 0.0f, +            .y = 0.0f, +            .width = static_cast<float>(extent.width), +            .height = static_cast<float>(extent.height), +            .minDepth = 0.0f, +            .maxDepth = 0.0f, +        }; +        const VkRect2D scissor{ +            .offset = offset, +            .extent = extent, +        }; +        const PushConstants push_constants{ +            .tex_scale = {viewport.width, viewport.height}, +            .tex_offset = {0.0f, 0.0f}, +        }; +        const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); +        UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + +        // TODO: Barriers +        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); +        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, +                                  nullptr); +        cmdbuf.SetViewport(0, viewport); +        cmdbuf.SetScissor(0, scissor); +        cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); +        cmdbuf.Draw(3, 1, 0, 0); +    }); +    scheduler.InvalidateState(); +} + +void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, +                                          ImageView& src_image_view, u32 up_scale, u32 down_shift) { +    const VkPipelineLayout layout = *two_textures_pipeline_layout; +    const VkImageView src_depth_view = src_image_view.DepthView(); +    const VkImageView src_stencil_view = src_image_view.StencilView(); +    const VkSampler sampler = *nearest_sampler; +    const VkExtent2D extent{ +        .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), +        .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), +    }; +    scheduler.RequestRenderpass(dst_framebuffer); +    scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale, +                      down_shift, this](vk::CommandBuffer cmdbuf) { +        const VkOffset2D offset{ +            .x = 0, +            .y = 0, +        }; +        const VkViewport viewport{ +            .x = 0.0f, +            .y = 0.0f, +            .width = static_cast<float>(extent.width), +            .height = static_cast<float>(extent.height), +            .minDepth = 0.0f, +            .maxDepth = 0.0f, +        }; +        const VkRect2D scissor{ +            .offset = offset, +            .extent = extent, +        }; +        const PushConstants push_constants{ +            .tex_scale = {viewport.width, viewport.height}, +            .tex_offset = {0.0f, 0.0f}, +        }; +        const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); +        UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, +                                       src_stencil_view); +        // TODO: Barriers +        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); +        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, +                                  nullptr); +        cmdbuf.SetViewport(0, viewport); +        cmdbuf.SetScissor(0, scissor); +        cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); +        cmdbuf.Draw(3, 1, 0, 0); +    }); +    scheduler.InvalidateState(); +} +  VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {      const auto it = std::ranges::find(blit_color_keys, key);      if (it != blit_color_keys.end()) { @@ -636,4 +751,44 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend      });  } +void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, +                                        vk::ShaderModule& module, bool is_target_depth, +                                        bool single_texture) { +    if (pipeline) { +        return; +    } +    const std::array stages = MakeStages(*full_screen_vert, *module); +    pipeline = device.GetLogical().CreateGraphicsPipeline({ +        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .stageCount = static_cast<u32>(stages.size()), +        .pStages = stages.data(), +        .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, +        .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, +        .pTessellationState = nullptr, +        .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, +        .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, +        .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, +        .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, +        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, +        .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, +        .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, +        .renderPass = renderpass, +        .subpass = 0, +        .basePipelineHandle = VK_NULL_HANDLE, +        .basePipelineIndex = 0, +    }); +} + +void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, +                                                   vk::ShaderModule& module, bool single_texture) { +    ConvertPipelineEx(pipeline, renderpass, module, false, single_texture); +} + +void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, +                                                   vk::ShaderModule& module, bool single_texture) { +    ConvertPipelineEx(pipeline, renderpass, module, true, single_texture); +} +  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index d77f76678..b1a717090 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -56,10 +56,22 @@ public:      void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,                           u32 up_scale, u32 down_shift); +    void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, +                             u32 up_scale, u32 down_shift); + +    void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, +                             u32 up_scale, u32 down_shift); +  private:      void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,                   const ImageView& src_image_view, u32 up_scale, u32 down_shift); +    void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, +                      ImageView& src_image_view, u32 up_scale, u32 down_shift); + +    void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, +                             ImageView& src_image_view, u32 up_scale, u32 down_shift); +      [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);      [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); @@ -68,6 +80,15 @@ private:      void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); +    void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, +                           vk::ShaderModule& module, bool is_target_depth, bool single_texture); + +    void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, +                                      vk::ShaderModule& module, bool single_texture); + +    void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, +                                      vk::ShaderModule& module, bool single_texture); +      const Device& device;      VKScheduler& scheduler;      StateTracker& state_tracker; @@ -83,6 +104,8 @@ private:      vk::ShaderModule blit_depth_stencil_frag;      vk::ShaderModule convert_depth_to_float_frag;      vk::ShaderModule convert_float_to_depth_frag; +    vk::ShaderModule convert_abgr8_to_d24s8_frag; +    vk::ShaderModule convert_d24s8_to_abgr8_frag;      vk::Sampler linear_sampler;      vk::Sampler nearest_sampler; @@ -94,6 +117,8 @@ private:      vk::Pipeline convert_r32_to_d32_pipeline;      vk::Pipeline convert_d16_to_r16_pipeline;      vk::Pipeline convert_r16_to_d16_pipeline; +    vk::Pipeline convert_abgr8_to_d24s8_pipeline; +    vk::Pipeline convert_d24s8_to_abgr8_pipeline;  };  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 68a23b602..751e4792b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -162,7 +162,7 @@ struct FormatTuple {      {VK_FORMAT_UNDEFINED},                                     // R16_SINT      {VK_FORMAT_R16G16_UNORM, Attachable | Storage},            // R16G16_UNORM      {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage},           // R16G16_FLOAT -    {VK_FORMAT_UNDEFINED},                                     // R16G16_UINT +    {VK_FORMAT_R16G16_UINT, Attachable | Storage},             // R16G16_UINT      {VK_FORMAT_R16G16_SINT, Attachable | Storage},             // R16G16_SINT      {VK_FORMAT_R16G16_SNORM, Attachable | Storage},            // R16G16_SNORM      {VK_FORMAT_UNDEFINED},                                     // R32G32B32_FLOAT @@ -176,8 +176,8 @@ struct FormatTuple {      {VK_FORMAT_R32_UINT, Attachable | Storage},                // R32_UINT      {VK_FORMAT_R32_SINT, Attachable | Storage},                // R32_SINT      {VK_FORMAT_ASTC_8x8_UNORM_BLOCK},                          // ASTC_2D_8X8_UNORM -    {VK_FORMAT_UNDEFINED},                                     // ASTC_2D_8X5_UNORM -    {VK_FORMAT_UNDEFINED},                                     // ASTC_2D_5X4_UNORM +    {VK_FORMAT_ASTC_8x5_UNORM_BLOCK},                          // ASTC_2D_8X5_UNORM +    {VK_FORMAT_ASTC_5x4_UNORM_BLOCK},                          // ASTC_2D_5X4_UNORM      {VK_FORMAT_B8G8R8A8_SRGB, Attachable},                     // B8G8R8A8_SRGB      {VK_FORMAT_BC1_RGBA_SRGB_BLOCK},                           // BC1_RGBA_SRGB      {VK_FORMAT_BC2_SRGB_BLOCK},                                // BC2_SRGB @@ -208,6 +208,9 @@ struct FormatTuple {      {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT      {VK_FORMAT_D16_UNORM, Attachable},  // D16_UNORM +    // Stencil formats +    {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT +      // DepthStencil formats      {VK_FORMAT_D24_UNORM_S8_UINT, Attachable},  // D24_UNORM_S8_UINT      {VK_FORMAT_D24_UNORM_S8_UINT, Attachable},  // S8_UINT_D24_UNORM (emulated) diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 407fd2a15..197cba8e3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -8,6 +8,7 @@  #include <vector>  #include "common/bit_cast.h" +#include "common/bit_util.h"  #include "common/settings.h"  #include "video_core/engines/fermi_2d.h" @@ -102,6 +103,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {              usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;              break;          case VideoCore::Surface::SurfaceType::Depth: +        case VideoCore::Surface::SurfaceType::Stencil:          case VideoCore::Surface::SurfaceType::DepthStencil:              usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;              break; @@ -173,6 +175,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {          return VK_IMAGE_ASPECT_COLOR_BIT;      case VideoCore::Surface::SurfaceType::Depth:          return VK_IMAGE_ASPECT_DEPTH_BIT; +    case VideoCore::Surface::SurfaceType::Stencil: +        return VK_IMAGE_ASPECT_STENCIL_BIT;      case VideoCore::Surface::SurfaceType::DepthStencil:          return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;      default: @@ -195,6 +199,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {      case PixelFormat::D16_UNORM:      case PixelFormat::D32_FLOAT:          return VK_IMAGE_ASPECT_DEPTH_BIT; +    case PixelFormat::S8_UINT: +        return VK_IMAGE_ASPECT_STENCIL_BIT;      default:          return VK_IMAGE_ASPECT_COLOR_BIT;      } @@ -308,6 +314,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {      };  } +[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src, +                                                    VkImageAspectFlags aspect_mask) noexcept { +    return VkBufferImageCopy{ +        .bufferOffset = 0, +        .bufferRowLength = 0, +        .bufferImageHeight = 0, +        .imageSubresource = MakeImageSubresourceLayers( +            is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask), +        .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset), +        .imageExtent = MakeExtent3D(copy.extent), +    }; +} +  [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(      std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {      std::vector<VkBufferCopy> result(copies.size()); @@ -754,6 +773,173 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {      return staging_buffer_pool.Request(size, MemoryUsage::Download);  } +bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { +    if (VideoCore::Surface::GetFormatType(dst.info.format) == +            VideoCore::Surface::SurfaceType::DepthStencil && +        !device.IsExtShaderStencilExportSupported()) { +        return true; +    } +    if (VideoCore::Surface::GetFormatType(src.info.format) == +            VideoCore::Surface::SurfaceType::DepthStencil && +        !device.IsExtShaderStencilExportSupported()) { +        return true; +    } +    if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT || +        src.info.format == PixelFormat::D32_FLOAT_S8_UINT) { +        return true; +    } +    return false; +} + +VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { +    const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); +    if (buffer_commits[level]) { +        return *buffers[level]; +    } +    const auto new_size = Common::NextPow2(needed_size); +    static constexpr VkBufferUsageFlags flags = +        VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | +        VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; +    buffers[level] = device.GetLogical().CreateBuffer({ +        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .size = new_size, +        .usage = flags, +        .sharingMode = VK_SHARING_MODE_EXCLUSIVE, +        .queueFamilyIndexCount = 0, +        .pQueueFamilyIndices = nullptr, +    }); +    buffer_commits[level] = std::make_unique<MemoryCommit>( +        memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); +    return *buffers[level]; +} + +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, +                                           std::span<const VideoCommon::ImageCopy> copies) { +    std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); +    std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); +    const VkImageAspectFlags src_aspect_mask = src.AspectMask(); +    const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); + +    std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { +        return MakeBufferImageCopy(copy, true, src_aspect_mask); +    }); +    std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { +        return MakeBufferImageCopy(copy, false, dst_aspect_mask); +    }); +    const u32 img_bpp = BytesPerBlock(src.info.format); +    size_t total_size = 0; +    for (const auto& copy : copies) { +        total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; +    } +    const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); +    const VkImage dst_image = dst.Handle(); +    const VkImage src_image = src.Handle(); +    scheduler.RequestOutsideRenderPassOperationContext(); +    scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, +                      vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { +        RangedBarrierRange dst_range; +        RangedBarrierRange src_range; +        for (const VkBufferImageCopy& copy : vk_in_copies) { +            src_range.AddLayers(copy.imageSubresource); +        } +        for (const VkBufferImageCopy& copy : vk_out_copies) { +            dst_range.AddLayers(copy.imageSubresource); +        } +        static constexpr VkMemoryBarrier READ_BARRIER{ +            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, +            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, +        }; +        static constexpr VkMemoryBarrier WRITE_BARRIER{ +            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +            .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, +        }; +        const std::array pre_barriers{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = src_image, +                .subresourceRange = src_range.SubresourceRange(src_aspect_mask), +            }, +        }; +        const std::array middle_in_barrier{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = 0, +                .dstAccessMask = 0, +                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, +                .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = src_image, +                .subresourceRange = src_range.SubresourceRange(src_aspect_mask), +            }, +        }; +        const std::array middle_out_barrier{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = dst_image, +                .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), +            }, +        }; +        const std::array post_barriers{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = dst_image, +                .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), +            }, +        }; +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                               0, {}, {}, pre_barriers); + +        cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, +                                 vk_in_copies); +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +                               0, WRITE_BARRIER, nullptr, middle_in_barrier); + +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                               0, READ_BARRIER, {}, middle_out_barrier); +        cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +                               0, {}, {}, post_barriers); +    }); +} +  void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,                                      const Region2D& dst_region, const Region2D& src_region,                                      Tegra::Engines::Fermi2D::Filter filter, @@ -881,6 +1067,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im              return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);          }          break; +    case PixelFormat::A8B8G8R8_UNORM: +        if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { +            return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); +        } +        break;      case PixelFormat::R32_FLOAT:          if (src_view.format == PixelFormat::D32_FLOAT) {              return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); @@ -891,6 +1082,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im              return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);          }          break; +    case PixelFormat::S8_UINT_D24_UNORM: +        return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); +        break;      case PixelFormat::D32_FLOAT:          if (src_view.format == PixelFormat::R32_FLOAT) {              return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); @@ -1386,6 +1580,14 @@ VkImageView ImageView::StencilView() {      return *stencil_view;  } +VkImageView ImageView::ColorView() { +    if (color_view) { +        return *color_view; +    } +    color_view = MakeView(VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_ASPECT_COLOR_BIT); +    return *color_view; +} +  VkImageView ImageView::StorageView(Shader::TextureType texture_type,                                     Shader::ImageFormat image_format) {      if (image_format == Shader::ImageFormat::Typeless) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f5f8f9a74..753e3e8a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -61,6 +61,10 @@ public:      void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +    bool ShouldReinterpret(Image& dst, Image& src); + +    void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +      void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);      bool CanAccelerateImageUpload(Image&) const noexcept { @@ -82,6 +86,8 @@ public:          return true;      } +    [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); +      const Device& device;      VKScheduler& scheduler;      MemoryAllocator& memory_allocator; @@ -90,6 +96,10 @@ public:      ASTCDecoderPass& astc_decoder_pass;      RenderPassCache& render_pass_cache;      const Settings::ResolutionScalingInfo& resolution; + +    constexpr static size_t indexing_slots = 8 * sizeof(size_t); +    std::array<vk::Buffer, indexing_slots> buffers{}; +    std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};  };  class Image : public VideoCommon::ImageBase { @@ -174,6 +184,8 @@ public:      [[nodiscard]] VkImageView StencilView(); +    [[nodiscard]] VkImageView ColorView(); +      [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,                                            Shader::ImageFormat image_format); @@ -214,6 +226,7 @@ private:      std::unique_ptr<StorageViews> storage_views;      vk::ImageView depth_view;      vk::ImageView stencil_view; +    vk::ImageView color_view;      VkImage image_handle = VK_NULL_HANDLE;      VkImageView render_target = VK_NULL_HANDLE;      VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; @@ -316,7 +329,6 @@ struct TextureCacheParams {      static constexpr bool FRAMEBUFFER_BLITS = false;      static constexpr bool HAS_EMULATED_COPIES = false;      static constexpr bool HAS_DEVICE_MEMORY_INFO = true; -    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false;      using Runtime = Vulkan::TextureCacheRuntime;      using Image = Vulkan::Image; diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index dc6995b46..bcaf5f575 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept {      const int now_complete = num_complete.load(std::memory_order::relaxed);      const int now_building = num_building.load(std::memory_order::relaxed);      if (now_complete == now_building) { -        const auto now = std::chrono::high_resolution_clock::now(); +        const auto now = std::chrono::steady_clock::now();          if (completed && num_complete == num_when_completed) {              if (now - complete_time > TIME_TO_STOP_REPORTING) {                  report_base = now_complete; diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index ad363bfb5..4d8d52071 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -28,6 +28,6 @@ private:      bool completed{};      int num_when_completed{}; -    std::chrono::high_resolution_clock::time_point complete_time; +    std::chrono::steady_clock::time_point complete_time;  };  } // namespace VideoCore diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 58d262446..a36015c8c 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -82,6 +82,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {          return PixelFormat::D32_FLOAT;      case Tegra::DepthFormat::D16_UNORM:          return PixelFormat::D16_UNORM; +    case Tegra::DepthFormat::S8_UINT: +        return PixelFormat::S8_UINT;      case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:          return PixelFormat::D32_FLOAT_S8_UINT;      default: @@ -214,6 +216,11 @@ SurfaceType GetFormatType(PixelFormat pixel_format) {      }      if (static_cast<std::size_t>(pixel_format) < +        static_cast<std::size_t>(PixelFormat::MaxStencilFormat)) { +        return SurfaceType::Stencil; +    } + +    if (static_cast<std::size_t>(pixel_format) <          static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {          return SurfaceType::DepthStencil;      } diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 2ce7c7d33..33e8d24ab 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -110,8 +110,12 @@ enum class PixelFormat {      MaxDepthFormat, +    // Stencil formats +    S8_UINT = MaxDepthFormat, +    MaxStencilFormat, +      // DepthStencil formats -    D24_UNORM_S8_UINT = MaxDepthFormat, +    D24_UNORM_S8_UINT = MaxStencilFormat,      S8_UINT_D24_UNORM,      D32_FLOAT_S8_UINT, @@ -125,8 +129,9 @@ constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max  enum class SurfaceType {      ColorTexture = 0,      Depth = 1, -    DepthStencil = 2, -    Invalid = 3, +    Stencil = 2, +    DepthStencil = 3, +    Invalid = 4,  };  enum class SurfaceTarget { @@ -229,6 +234,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{      1,  // E5B9G9R9_FLOAT      1,  // D32_FLOAT      1,  // D16_UNORM +    1,  // S8_UINT      1,  // D24_UNORM_S8_UINT      1,  // S8_UINT_D24_UNORM      1,  // D32_FLOAT_S8_UINT @@ -328,6 +334,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{      1,  // E5B9G9R9_FLOAT      1,  // D32_FLOAT      1,  // D16_UNORM +    1,  // S8_UINT      1,  // D24_UNORM_S8_UINT      1,  // S8_UINT_D24_UNORM      1,  // D32_FLOAT_S8_UINT @@ -427,6 +434,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{      32,  // E5B9G9R9_FLOAT      32,  // D32_FLOAT      16,  // D16_UNORM +    8,   // S8_UINT      32,  // D24_UNORM_S8_UINT      32,  // S8_UINT_D24_UNORM      64,  // D32_FLOAT_S8_UINT diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index ddfb726fe..afa807d5d 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -139,6 +139,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,          return PixelFormat::D16_UNORM;      case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):          return PixelFormat::S8_UINT_D24_UNORM; +    case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR): +        return PixelFormat::S8_UINT_D24_UNORM;      case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):          return PixelFormat::S8_UINT_D24_UNORM;      case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index c6cf0583f..b2c81057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -194,6 +194,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str                  return "D32_FLOAT";              case PixelFormat::D16_UNORM:                  return "D16_UNORM"; +            case PixelFormat::S8_UINT: +                return "S8_UINT";              case PixelFormat::D24_UNORM_S8_UINT:                  return "D24_UNORM_S8_UINT";              case PixelFormat::S8_UINT_D24_UNORM: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 241f71a91..5aaeb16ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -472,9 +472,10 @@ template <class P>  void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,                                  const Tegra::Engines::Fermi2D::Surface& src,                                  const Tegra::Engines::Fermi2D::Config& copy) { -    const BlitImages images = GetBlitImages(dst, src); +    const BlitImages images = GetBlitImages(dst, src, copy);      const ImageId dst_id = images.dst_id;      const ImageId src_id = images.src_id; +      PrepareImage(src_id, false, false);      PrepareImage(dst_id, true, false); @@ -758,14 +759,18 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,              return ImageId{};          }      } -    const bool broken_views = runtime.HasBrokenTextureViewFormats(); +    const bool broken_views = +        runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews);      const bool native_bgr = runtime.HasNativeBgr(); -    ImageId image_id; +    const bool flexible_formats = True(options & RelaxedOptions::Format); +    ImageId image_id{}; +    boost::container::small_vector<ImageId, 1> image_ids;      const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {          if (True(existing_image.flags & ImageFlagBits::Remapped)) {              return false;          } -        if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { +        if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) +            [[unlikely]] {              const bool strict_size = False(options & RelaxedOptions::Size) &&                                       True(existing_image.flags & ImageFlagBits::Strong);              const ImageInfo& existing = existing_image.info; @@ -774,17 +779,27 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,                  IsPitchLinearSameSize(existing, info, strict_size) &&                  IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {                  image_id = existing_image_id; -                return true; +                image_ids.push_back(existing_image_id); +                return !flexible_formats && existing.format == info.format;              }          } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,                                   native_bgr)) {              image_id = existing_image_id; -            return true; +            image_ids.push_back(existing_image_id); +            return !flexible_formats && existing_image.info.format == info.format;          }          return false;      };      ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); -    return image_id; +    if (image_ids.size() <= 1) [[likely]] { +        return image_id; +    } +    auto image_ids_compare = [this](ImageId a, ImageId b) { +        auto& image_a = slot_images[a]; +        auto& image_b = slot_images[b]; +        return image_a.modification_tick < image_b.modification_tick; +    }; +    return *std::ranges::max_element(image_ids, image_ids_compare);  }  template <class P> @@ -1076,31 +1091,66 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA  template <class P>  typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( -    const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { -    static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; +    const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, +    const Tegra::Engines::Fermi2D::Config& copy) { + +    static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples;      const GPUVAddr dst_addr = dst.Address();      const GPUVAddr src_addr = src.Address();      ImageInfo dst_info(dst);      ImageInfo src_info(src); +    const bool can_be_depth_blit = +        dst_info.format == src_info.format && copy.filter == Tegra::Engines::Fermi2D::Filter::Point;      ImageId dst_id;      ImageId src_id; +    RelaxedOptions try_options = FIND_OPTIONS; +    if (can_be_depth_blit) { +        try_options |= RelaxedOptions::Format; +    }      do {          has_deleted_images = false; -        dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); -        src_id = FindImage(src_info, src_addr, FIND_OPTIONS); -        const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; +        src_id = FindImage(src_info, src_addr, try_options); +        dst_id = FindImage(dst_info, dst_addr, try_options);          const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; -        DeduceBlitImages(dst_info, src_info, dst_image, src_image); -        if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { -            continue; +        if (src_image && src_image->info.num_samples > 1) { +            RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; +            src_id = FindOrInsertImage(src_info, src_addr, find_options); +            dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); +            if (has_deleted_images) { +                continue; +            } +            break;          } -        if (!dst_id) { -            dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); +        if (can_be_depth_blit) { +            const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; +            DeduceBlitImages(dst_info, src_info, dst_image, src_image); +            if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { +                continue; +            }          }          if (!src_id) {              src_id = InsertImage(src_info, src_addr, RelaxedOptions{});          } +        if (!dst_id) { +            dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); +        }      } while (has_deleted_images); +    const ImageBase& src_image = slot_images[src_id]; +    const ImageBase& dst_image = slot_images[dst_id]; +    const bool native_bgr = runtime.HasNativeBgr(); +    if (GetFormatType(dst_info.format) != GetFormatType(dst_image.info.format) || +        GetFormatType(src_info.format) != GetFormatType(src_image.info.format) || +        !VideoCore::Surface::IsViewCompatible(dst_info.format, dst_image.info.format, false, +                                              native_bgr) || +        !VideoCore::Surface::IsViewCompatible(src_info.format, src_image.info.format, false, +                                              native_bgr)) { +        // Make sure the images match the expected format. +        do { +            has_deleted_images = false; +            src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); +            dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); +        } while (has_deleted_images); +    }      return BlitImages{          .dst_id = dst_id,          .src_id = src_id, @@ -1157,7 +1207,14 @@ template <class P>  ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,                                                    bool is_clear) {      const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; -    const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); +    ImageId image_id{}; +    bool delete_state = has_deleted_images; +    do { +        has_deleted_images = false; +        image_id = FindOrInsertImage(info, gpu_addr, options); +        delete_state |= has_deleted_images; +    } while (has_deleted_images); +    has_deleted_images = delete_state;      if (!image_id) {          return NULL_IMAGE_VIEW_ID;      } @@ -1759,8 +1816,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag      }      UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);      UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); -    if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { -        return runtime.ConvertImage(dst, src, copies); +    if (runtime.ShouldReinterpret(dst, src)) { +        return runtime.ReinterpretImage(dst, src, copies);      }      for (const ImageCopy& copy : copies) {          UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); @@ -1780,7 +1837,13 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag          const SubresourceExtent src_extent{.levels = 1, .layers = 1};          const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};          const SubresourceRange src_range{.base = src_base, .extent = src_extent}; -        const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); +        PixelFormat dst_format = dst.info.format; +        if (GetFormatType(src.info.format) == SurfaceType::DepthStencil && +            GetFormatType(dst_format) == SurfaceType::ColorTexture && +            BytesPerBlock(dst_format) == 4) { +            dst_format = PixelFormat::A8B8G8R8_UNORM; +        } +        const ImageViewInfo dst_view_info(ImageViewType::e2D, dst_format, dst_range);          const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);          const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);          Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index a9504c0e8..7107887a6 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -59,8 +59,6 @@ class TextureCache {      static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;      /// True when the API can provide info about the memory of the device.      static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; -    /// True when the API provides utilities for pixel format conversions. -    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS;      static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;      static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; @@ -254,7 +252,8 @@ private:      /// Return a blit image pair from the given guest blit parameters      [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, -                                           const Tegra::Engines::Fermi2D::Surface& src); +                                           const Tegra::Engines::Fermi2D::Surface& src, +                                           const Tegra::Engines::Fermi2D::Config& copy);      /// Find or create a sampler from a guest descriptor sampler      [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 5c274abdf..5ac27b3a7 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 {      Size = 1 << 0,      Format = 1 << 1,      Samples = 1 << 2, +    ForceBrokenViews = 1 << 3,  };  DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index ddc9fb13a..7bd31b211 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1151,6 +1151,7 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr  void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,                        const ImageBase* src) { +    const auto original_dst_format = dst_info.format;      if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {          src_info.format = src->info.format;      } @@ -1161,7 +1162,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase*          dst_info.format = src->info.format;      }      if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { -        src_info.format = dst->info.format; +        if (src) { +            if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { +                dst_info.format = original_dst_format; +            } +        } else { +            src_info.format = dst->info.format; +        }      }  } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index e852c817e..329bf4def 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -55,10 +55,4 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor      }  } -float GetResolutionScaleFactor(const RendererBase& renderer) { -    return Settings::values.resolution_info.active -               ? Settings::values.resolution_info.up_factor -               : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio(); -} -  } // namespace VideoCore diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f86877e86..084df641f 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -25,6 +25,4 @@ class RendererBase;  /// Creates an emulated GPU instance using the given system context.  std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); -float GetResolutionScaleFactor(const RendererBase& renderer); -  } // namespace VideoCore diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 95106f88f..7bf5b6578 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -21,6 +21,13 @@  namespace Vulkan {  namespace {  namespace Alternatives { +constexpr std::array STENCIL8_UINT{ +    VK_FORMAT_D16_UNORM_S8_UINT, +    VK_FORMAT_D24_UNORM_S8_UINT, +    VK_FORMAT_D32_SFLOAT_S8_UINT, +    VK_FORMAT_UNDEFINED, +}; +  constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{      VK_FORMAT_D32_SFLOAT_S8_UINT,      VK_FORMAT_D16_UNORM_S8_UINT, @@ -74,6 +81,8 @@ void SetNext(void**& next, T& data) {  constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {      switch (format) { +    case VK_FORMAT_S8_UINT: +        return Alternatives::STENCIL8_UINT.data();      case VK_FORMAT_D24_UNORM_S8_UINT:          return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();      case VK_FORMAT_D16_UNORM_S8_UINT: @@ -121,6 +130,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica          VK_FORMAT_R16G16_UNORM,          VK_FORMAT_R16G16_SNORM,          VK_FORMAT_R16G16_SFLOAT, +        VK_FORMAT_R16G16_UINT,          VK_FORMAT_R16G16_SINT,          VK_FORMAT_R16_UNORM,          VK_FORMAT_R16_SNORM, @@ -145,6 +155,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica          VK_FORMAT_R4G4B4A4_UNORM_PACK16,          VK_FORMAT_D32_SFLOAT,          VK_FORMAT_D16_UNORM, +        VK_FORMAT_S8_UINT,          VK_FORMAT_D16_UNORM_S8_UINT,          VK_FORMAT_D24_UNORM_S8_UINT,          VK_FORMAT_D32_SFLOAT_S8_UINT,  | 
