diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 174 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 11 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 2 | 
6 files changed, 193 insertions, 9 deletions
| diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6956535e5..e70bbec81 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -526,8 +526,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,      }  } -void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, -                                       std::span<const VideoCommon::ImageCopy> copies) { +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, +                                           std::span<const VideoCommon::ImageCopy> copies) {      LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);      format_conversion_pass.ConvertImage(dst, src, copies);  } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 578f8d523..ad5157d66 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -84,9 +84,13 @@ public:      u64 GetDeviceLocalMemory() const; +    bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { +        return true; +    } +      void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); -    void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +    void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);      void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {          UNIMPLEMENTED(); @@ -338,7 +342,6 @@ struct TextureCacheParams {      static constexpr bool FRAMEBUFFER_BLITS = true;      static constexpr bool HAS_EMULATED_COPIES = true;      static constexpr bool HAS_DEVICE_MEMORY_INFO = true; -    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true;      using Runtime = OpenGL::TextureCacheRuntime;      using Image = OpenGL::Image; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index af1a11059..02215cfc2 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -308,6 +308,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {      };  } +[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src, +                                                    VkImageAspectFlags aspect_mask) noexcept { +    return VkBufferImageCopy{ +        .bufferOffset = 0, +        .bufferRowLength = 0, +        .bufferImageHeight = 0, +        .imageSubresource = MakeImageSubresourceLayers( +            is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask), +        .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset), +        .imageExtent = MakeExtent3D(copy.extent), +    }; +} +  [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(      std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {      std::vector<VkBufferCopy> result(copies.size()); @@ -754,6 +767,167 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {      return staging_buffer_pool.Request(size, MemoryUsage::Download);  } +bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { +    if (VideoCore::Surface::GetFormatType(dst.info.format) == +        VideoCore::Surface::SurfaceType::DepthStencil) { +        return !device.IsExtShaderStencilExportSupported(); +    } +    return false; +} + +[[nodiscard]] size_t NextPow2(size_t value) { +    return static_cast<size_t>(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U))); +} + +VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { +    const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); +    if (buffer_commits[level]) { +        return *buffers[level]; +    } +    const auto new_size = NextPow2(needed_size); +    VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | +                               VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | +                               VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; +    buffers[level] = device.GetLogical().CreateBuffer({ +        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, +        .pNext = nullptr, +        .flags = 0, +        .size = new_size, +        .usage = flags, +        .sharingMode = VK_SHARING_MODE_EXCLUSIVE, +        .queueFamilyIndexCount = 0, +        .pQueueFamilyIndices = nullptr, +    }); +    buffer_commits[level] = std::make_unique<MemoryCommit>( +        memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); +    return *buffers[level]; +} + +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, +                                           std::span<const VideoCommon::ImageCopy> copies) { +    std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); +    std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); +    const VkImageAspectFlags src_aspect_mask = src.AspectMask(); +    const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); + +    std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { +        return MakeBufferImageCopy(copy, true, src_aspect_mask); +    }); +    std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { +        return MakeBufferImageCopy(copy, false, dst_aspect_mask); +    }); +    const u32 img_bpp = BytesPerBlock(src.info.format); +    size_t total_size = 0; +    for (const auto& copy : copies) { +        total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; +    } +    const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); +    const VkImage dst_image = dst.Handle(); +    const VkImage src_image = src.Handle(); +    scheduler.RequestOutsideRenderPassOperationContext(); +    scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, +                      vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { +        RangedBarrierRange dst_range; +        RangedBarrierRange src_range; +        for (const VkBufferImageCopy& copy : vk_in_copies) { +            src_range.AddLayers(copy.imageSubresource); +        } +        for (const VkBufferImageCopy& copy : vk_out_copies) { +            dst_range.AddLayers(copy.imageSubresource); +        } +        static constexpr VkMemoryBarrier READ_BARRIER{ +            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, +            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, +        }; +        static constexpr VkMemoryBarrier WRITE_BARRIER{ +            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, +            .pNext = nullptr, +            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +            .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, +        }; +        const std::array pre_barriers{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = src_image, +                .subresourceRange = src_range.SubresourceRange(src_aspect_mask), +            }, +        }; +        const std::array middle_in_barrier{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = 0, +                .dstAccessMask = 0, +                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, +                .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = src_image, +                .subresourceRange = src_range.SubresourceRange(src_aspect_mask), +            }, +        }; +        const std::array middle_out_barrier{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_GENERAL, +                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = dst_image, +                .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), +            }, +        }; +        const std::array post_barriers{ +            VkImageMemoryBarrier{ +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +                .pNext = nullptr, +                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, +                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | +                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | +                                 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, +                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +                .newLayout = VK_IMAGE_LAYOUT_GENERAL, +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +                .image = dst_image, +                .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), +            }, +        }; +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                               0, {}, {}, pre_barriers); + +        cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, +                                 vk_in_copies); +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +                               0, WRITE_BARRIER, nullptr, middle_in_barrier); + +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, +                               0, READ_BARRIER, {}, middle_out_barrier); +        cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); +        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +                               0, {}, {}, post_barriers); +    }); +} +  void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,                                      const Region2D& dst_region, const Region2D& src_region,                                      Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f5f8f9a74..44e9dcee4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -61,6 +61,10 @@ public:      void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +    bool ShouldReinterpret(Image& dst, Image& src); + +    void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +      void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);      bool CanAccelerateImageUpload(Image&) const noexcept { @@ -82,6 +86,8 @@ public:          return true;      } +    [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); +      const Device& device;      VKScheduler& scheduler;      MemoryAllocator& memory_allocator; @@ -90,6 +96,10 @@ public:      ASTCDecoderPass& astc_decoder_pass;      RenderPassCache& render_pass_cache;      const Settings::ResolutionScalingInfo& resolution; + +    constexpr static size_t indexing_slots = 8 * sizeof(size_t); +    std::array<vk::Buffer, indexing_slots> buffers{}; +    std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};  };  class Image : public VideoCommon::ImageBase { @@ -316,7 +326,6 @@ struct TextureCacheParams {      static constexpr bool FRAMEBUFFER_BLITS = false;      static constexpr bool HAS_EMULATED_COPIES = false;      static constexpr bool HAS_DEVICE_MEMORY_INFO = true; -    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false;      using Runtime = Vulkan::TextureCacheRuntime;      using Image = Vulkan::Image; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4188f93c5..44a0d42ba 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1762,8 +1762,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag      }      UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);      UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); -    if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { -        return runtime.ConvertImage(dst, src, copies); +    if (runtime.ShouldReinterpret(dst, src)) { +        return runtime.ReinterpretImage(dst, src, copies);      }      for (const ImageCopy& copy : copies) {          UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index a9504c0e8..643ad811c 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -59,8 +59,6 @@ class TextureCache {      static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;      /// True when the API can provide info about the memory of the device.      static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; -    /// True when the API provides utilities for pixel format conversions. -    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS;      static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;      static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 
