diff options
| -rw-r--r-- | src/video_core/texture_cache/decode_bc.cpp | 50 | ||||
| -rw-r--r-- | src/video_core/texture_cache/decode_bc.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 16 | 
3 files changed, 35 insertions, 33 deletions
| diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp index 3e26474a3..a018c6df4 100644 --- a/src/video_core/texture_cache/decode_bc.cpp +++ b/src/video_core/texture_cache/decode_bc.cpp @@ -60,66 +60,72 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {  }  template <auto decompress, PixelFormat pixel_format> -void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,                        bool is_signed = false) {      const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); -    const u32 block_width = std::min(extent.width, BLOCK_SIZE); -    const u32 block_height = std::min(extent.height, BLOCK_SIZE); -    const u32 pitch = extent.width * out_bpp; +    const u32 block_size = BlockSize(pixel_format); +    const u32 width = copy.image_extent.width; +    const u32 height = copy.image_extent.height * copy.image_subresource.num_layers; +    const u32 depth = copy.image_extent.depth; +    const u32 block_width = std::min(width, BLOCK_SIZE); +    const u32 block_height = std::min(height, BLOCK_SIZE); +    const u32 pitch = width * out_bpp;      size_t input_offset = 0;      size_t output_offset = 0; -    for (u32 slice = 0; slice < extent.depth; ++slice) { -        for (u32 y = 0; y < extent.height; y += block_height) { -            size_t row_offset = 0; -            for (u32 x = 0; x < extent.width; -                 x += block_width, row_offset += block_width * out_bpp) { -                const u8* src = input.data() + input_offset; -                u8* const dst = output.data() + output_offset + row_offset; +    for (u32 slice = 0; slice < depth; ++slice) { +        for (u32 y = 0; y < height; y += block_height) { +            size_t src_offset = input_offset; +            size_t dst_offset = output_offset; +            for (u32 x = 0; x < width; x += block_width) { +                const u8* src = input.data() + src_offset; +                u8* const dst = output.data() + dst_offset;                  if constexpr (IsSigned(pixel_format)) { -                    decompress(src, dst, x, y, extent.width, extent.height, is_signed); +                    decompress(src, dst, x, y, width, height, is_signed);                  } else { -                    decompress(src, dst, x, y, extent.width, extent.height); +                    decompress(src, dst, x, y, width, height);                  } -                input_offset += BlockSize(pixel_format); +                src_offset += block_size; +                dst_offset += block_width * out_bpp;              } +            input_offset += copy.buffer_row_length * block_size / block_width;              output_offset += block_height * pitch;          }      }  } -void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,                     VideoCore::Surface::PixelFormat pixel_format) {      switch (pixel_format) {      case PixelFormat::BC1_RGBA_UNORM:      case PixelFormat::BC1_RGBA_SRGB: -        DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent); +        DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy);          break;      case PixelFormat::BC2_UNORM:      case PixelFormat::BC2_SRGB: -        DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent); +        DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy);          break;      case PixelFormat::BC3_UNORM:      case PixelFormat::BC3_SRGB: -        DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent); +        DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy);          break;      case PixelFormat::BC4_SNORM:      case PixelFormat::BC4_UNORM:          DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( -            input, output, extent, pixel_format == PixelFormat::BC4_SNORM); +            input, output, copy, pixel_format == PixelFormat::BC4_SNORM);          break;      case PixelFormat::BC5_SNORM:      case PixelFormat::BC5_UNORM:          DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( -            input, output, extent, pixel_format == PixelFormat::BC5_SNORM); +            input, output, copy, pixel_format == PixelFormat::BC5_SNORM);          break;      case PixelFormat::BC6H_SFLOAT:      case PixelFormat::BC6H_UFLOAT:          DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( -            input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); +            input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT);          break;      case PixelFormat::BC7_SRGB:      case PixelFormat::BC7_UNORM: -        DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent); +        DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy);          break;      default:          LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h index 41d1ec0a3..4e3b9b8ac 100644 --- a/src/video_core/texture_cache/decode_bc.h +++ b/src/video_core/texture_cache/decode_bc.h @@ -13,7 +13,7 @@ namespace VideoCommon {  [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); -void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,                     VideoCore::Surface::PixelFormat pixel_format);  } // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 15596c925..fcf70068e 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -837,6 +837,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory                                                                     std::span<u8> output) {      const size_t guest_size_bytes = input.size_bytes();      const u32 bpp_log2 = BytesPerBlockLog2(info.format); +    const Extent2D tile_size = DefaultBlockSize(info.format);      const Extent3D size = info.size;      if (info.type == ImageType::Linear) { @@ -847,7 +848,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory          return {{              .buffer_offset = 0,              .buffer_size = guest_size_bytes, -            .buffer_row_length = info.pitch >> bpp_log2, +            .buffer_row_length = info.pitch * tile_size.width >> bpp_log2,              .buffer_image_height = size.height,              .image_subresource =                  { @@ -862,7 +863,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory      const LevelInfo level_info = MakeLevelInfo(info);      const s32 num_layers = info.resources.layers;      const s32 num_levels = info.resources.levels; -    const Extent2D tile_size = DefaultBlockSize(info.format);      const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);      const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);      const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels); @@ -926,8 +926,6 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8          const auto input_offset = input.subspan(copy.buffer_offset);          copy.buffer_offset = output_offset; -        copy.buffer_row_length = mip_size.width; -        copy.buffer_image_height = mip_size.height;          const auto recompression_setting = Settings::values.astc_recompression.GetValue();          const bool astc = IsPixelFormatASTC(info.format); @@ -972,16 +970,14 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8                  bpp_div;              output_offset += static_cast<u32>(copy.buffer_size);          } else { -            const Extent3D image_extent{ -                .width = copy.image_extent.width, -                .height = copy.image_extent.height * copy.image_subresource.num_layers, -                .depth = copy.image_extent.depth, -            }; -            DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); +            DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format);              output_offset += copy.image_extent.width * copy.image_extent.height *                               copy.image_subresource.num_layers *                               ConvertedBytesPerBlock(info.format);          } + +        copy.buffer_row_length = mip_size.width; +        copy.buffer_image_height = mip_size.height;      }  } | 
