diff options
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/host_shaders/convert_msaa_to_non_msaa.comp | 30 | ||||
| -rw-r--r-- | src/video_core/host_shaders/convert_non_msaa_to_msaa.comp | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/formatter.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 14 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 5 | 
12 files changed, 136 insertions, 14 deletions
| diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 52cd5bb81..2442c3c29 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -22,6 +22,8 @@ set(SHADER_FILES      convert_d24s8_to_abgr8.frag      convert_depth_to_float.frag      convert_float_to_depth.frag +    convert_msaa_to_non_msaa.comp +    convert_non_msaa_to_msaa.comp      convert_s8d24_to_abgr8.frag      full_screen_triangle.vert      fxaa.frag diff --git a/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp new file mode 100644 index 000000000..fc3854d18 --- /dev/null +++ b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0, rgba8) uniform readonly restrict image2DMSArray msaa_in; +layout (binding = 1, rgba8) uniform writeonly restrict image2DArray output_img; + +void main() { +    const ivec3 coords = ivec3(gl_GlobalInvocationID); +    if (any(greaterThanEqual(coords, imageSize(msaa_in)))) { +        return; +    } + +    // TODO: Specialization constants for num_samples? +    const int num_samples = imageSamples(msaa_in); +    for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { +        const vec4 pixel = imageLoad(msaa_in, coords, curr_sample); + +        const int single_sample_x = 2 * coords.x + (curr_sample & 1); +        const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); +        const ivec3 dest_coords = ivec3(single_sample_x, single_sample_y, coords.z); + +        if (any(greaterThanEqual(dest_coords, imageSize(output_img)))) { +            continue; +        } +        imageStore(output_img, dest_coords, pixel); +    } +} diff --git a/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp new file mode 100644 index 000000000..dedd962f1 --- /dev/null +++ b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0, rgba8) uniform readonly restrict image2DArray img_in; +layout (binding = 1, rgba8) uniform writeonly restrict image2DMSArray output_msaa; + +void main() { +    const ivec3 coords = ivec3(gl_GlobalInvocationID); +    if (any(greaterThanEqual(coords, imageSize(output_msaa)))) { +        return; +    } + +    // TODO: Specialization constants for num_samples? +    const int num_samples = imageSamples(output_msaa); +    for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { +        const int single_sample_x = 2 * coords.x + (curr_sample & 1); +        const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); +        const ivec3 single_coords = ivec3(single_sample_x, single_sample_y, coords.z); + +        if (any(greaterThanEqual(single_coords, imageSize(img_in)))) { +            continue; +        } +        const vec4 pixel = imageLoad(img_in, single_coords); +        imageStore(output_msaa, coords, curr_sample, pixel); +    } +} diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9f7ce7414..eb6e43a08 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -557,6 +557,14 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,      }  } +void TextureCacheRuntime::CopyImageMSAA(Image& dst_image, Image& src_image, +                                        std::span<const VideoCommon::ImageCopy> copies) { +    LOG_DEBUG(Render_OpenGL, "Copying from {} samples to {} samples", src_image.info.num_samples, +              dst_image.info.num_samples); +    // TODO: Leverage the format conversion pass if possible/accurate. +    util_shaders.CopyMSAA(dst_image, src_image, copies); +} +  void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,                                             std::span<const VideoCommon::ImageCopy> copies) {      LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 5d9d370f2..e30875496 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -93,12 +93,19 @@ public:          return device.CanReportMemoryUsage();      } -    bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { +    bool ShouldReinterpret([[maybe_unused]] Image& dst, +                           [[maybe_unused]] Image& src) const noexcept { +        return true; +    } + +    bool CanUploadMSAA() const noexcept {          return true;      }      void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +    void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +      void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);      void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 404def62e..2c7ac210b 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -12,6 +12,8 @@  #include "video_core/host_shaders/astc_decoder_comp.h"  #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"  #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/convert_msaa_to_non_msaa_comp.h" +#include "video_core/host_shaders/convert_non_msaa_to_msaa_comp.h"  #include "video_core/host_shaders/opengl_convert_s8d24_comp.h"  #include "video_core/host_shaders/opengl_copy_bc4_comp.h"  #include "video_core/host_shaders/pitch_unswizzle_comp.h" @@ -51,7 +53,9 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)        block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),        pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),        copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)), -      convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) { +      convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)), +      convert_ms_to_nonms_program(MakeProgram(CONVERT_MSAA_TO_NON_MSAA_COMP)), +      convert_nonms_to_ms_program(MakeProgram(CONVERT_NON_MSAA_TO_MSAA_COMP)) {      const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();      swizzle_table_buffer.Create();      glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); @@ -269,6 +273,33 @@ void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copi      program_manager.RestoreGuestCompute();  } +void UtilShaders::CopyMSAA(Image& dst_image, Image& src_image, +                           std::span<const VideoCommon::ImageCopy> copies) { +    const bool is_ms_to_non_ms = src_image.info.num_samples > 1 && dst_image.info.num_samples == 1; +    const auto program_handle = +        is_ms_to_non_ms ? convert_ms_to_nonms_program.handle : convert_nonms_to_ms_program.handle; +    program_manager.BindComputeProgram(program_handle); + +    for (const ImageCopy& copy : copies) { +        ASSERT(copy.src_subresource.base_layer == 0); +        ASSERT(copy.src_subresource.num_layers == 1); +        ASSERT(copy.dst_subresource.base_layer == 0); +        ASSERT(copy.dst_subresource.num_layers == 1); + +        glBindImageTexture(0, src_image.StorageHandle(), copy.src_subresource.base_level, GL_TRUE, +                           0, GL_READ_ONLY, GL_RGBA8); +        glBindImageTexture(1, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_TRUE, +                           0, GL_WRITE_ONLY, GL_RGBA8); + +        const u32 num_dispatches_x = Common::DivCeil(copy.extent.width, 8U); +        const u32 num_dispatches_y = Common::DivCeil(copy.extent.height, 8U); +        const u32 num_dispatches_z = copy.extent.depth; + +        glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); +    } +    program_manager.RestoreGuestCompute(); +} +  GLenum StoreFormat(u32 bytes_per_block) {      switch (bytes_per_block) {      case 1: diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 44efb6ecf..9013808e7 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -40,6 +40,9 @@ public:      void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies); +    void CopyMSAA(Image& dst_image, Image& src_image, +                  std::span<const VideoCommon::ImageCopy> copies); +  private:      ProgramManager& program_manager; @@ -51,6 +54,8 @@ private:      OGLProgram pitch_unswizzle_program;      OGLProgram copy_bc4_program;      OGLProgram convert_s8d24_program; +    OGLProgram convert_ms_to_nonms_program; +    OGLProgram convert_nonms_to_ms_program;  };  GLenum StoreFormat(u32 bytes_per_block); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d39372ec4..9b85dfb5e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1230,6 +1230,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,      });  } +void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, +                                        std::span<const VideoCommon::ImageCopy> copies) { +    UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan."); +} +  u64 TextureCacheRuntime::GetDeviceLocalMemory() const {      return device.GetDeviceLocalMemory();  } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 1f27a3589..b9ee83de7 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -70,6 +70,8 @@ public:      void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +    void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); +      bool ShouldReinterpret(Image& dst, Image& src);      void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); @@ -80,6 +82,11 @@ public:          return false;      } +    bool CanUploadMSAA() const noexcept { +        // TODO: Implement buffer to MSAA uploads +        return false; +    } +      void AccelerateImageUpload(Image&, const StagingBufferRef&,                                 std::span<const VideoCommon::SwizzleParameters>); diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 418890126..30f72361d 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -22,6 +22,9 @@ std::string Name(const ImageBase& image) {      const u32 num_layers = image.info.resources.layers;      const u32 num_levels = image.info.resources.levels;      std::string resource; +    if (image.info.num_samples > 1) { +        resource += fmt::format(":{}xMSAA", image.info.num_samples); +    }      if (num_layers > 1) {          resource += fmt::format(":L{}", num_layers);      } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1b01990a4..3e2cbb0b0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -773,7 +773,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {      image.flags &= ~ImageFlagBits::CpuModified;      TrackImage(image, image_id); -    if (image.info.num_samples > 1) { +    if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {          LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");          return;      } @@ -1167,14 +1167,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA          if (True(overlap.flags & ImageFlagBits::GpuModified)) {              new_image.flags |= ImageFlagBits::GpuModified;          } +        const auto& resolution = Settings::values.resolution_info; +        const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); +        const u32 up_scale = can_rescale ? resolution.up_scale : 1; +        const u32 down_shift = can_rescale ? resolution.down_shift : 0; +        auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);          if (overlap.info.num_samples != new_image.info.num_samples) { -            LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); +            runtime.CopyImageMSAA(new_image, overlap, std::move(copies));          } else { -            const auto& resolution = Settings::values.resolution_info; -            const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); -            const u32 up_scale = can_rescale ? resolution.up_scale : 1; -            const u32 down_shift = can_rescale ? resolution.down_shift : 0; -            auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);              runtime.CopyImage(new_image, overlap, std::move(copies));          }          if (True(overlap.flags & ImageFlagBits::Tracked)) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 03acc68d9..697f86641 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -573,10 +573,6 @@ u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {      if (info.type == ImageType::Buffer) {          return info.size.width * BytesPerBlock(info.format);      } -    if (info.num_samples > 1) { -        // Multisample images can't be uploaded or downloaded to the host -        return 0; -    }      if (info.type == ImageType::Linear) {          return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));      } @@ -703,7 +699,6 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {  std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,                                               SubresourceBase base, u32 up_scale, u32 down_shift) {      ASSERT(dst.resources.levels >= src.resources.levels); -    ASSERT(dst.num_samples == src.num_samples);      const bool is_dst_3d = dst.type == ImageType::e3D;      if (is_dst_3d) { | 
