diff options
| author | bunnei <bunneidev@gmail.com> | 2022-09-19 12:17:51 -0700 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-09-19 12:17:51 -0700 | 
| commit | 8d4458ef24e473e57b9931d7a9d1442b51fb0b1a (patch) | |
| tree | eacbdc37ea2879612238e15cf3c47c82f297d756 | |
| parent | 3a5f9409c897f12e69cfc1d395e743b4850330ec (diff) | |
| parent | 809126c94a0ed8e7964d5a550abf7b3731d00512 (diff) | |
Merge pull request #8849 from Morph1984/parallel-astc
astc: Enable parallel CPU astc decoding
| -rw-r--r-- | src/video_core/textures/astc.cpp | 56 | 
1 files changed, 35 insertions, 21 deletions
| diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index e3f3d3c5d..b159494c5 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -13,7 +13,9 @@  #include <boost/container/static_vector.hpp> +#include "common/alignment.h"  #include "common/common_types.h" +#include "common/thread_worker.h"  #include "video_core/textures/astc.h"  class InputBitStream { @@ -1650,29 +1652,41 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,  void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,                  uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { -    u32 block_index = 0; -    std::size_t depth_offset = 0; -    for (u32 z = 0; z < depth; z++) { -        for (u32 y = 0; y < height; y += block_height) { -            for (u32 x = 0; x < width; x += block_width) { -                const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; - -                // Blocks can be at most 12x12 -                std::array<u32, 12 * 12> uncompData; -                DecompressBlock(blockPtr, block_width, block_height, uncompData); - -                u32 decompWidth = std::min(block_width, width - x); -                u32 decompHeight = std::min(block_height, height - y); - -                const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); -                for (u32 jj = 0; jj < decompHeight; jj++) { -                    std::memcpy(outRow.data() + jj * width * 4, -                                uncompData.data() + jj * block_width, decompWidth * 4); +    const u32 rows = Common::DivideUp(height, block_height); +    const u32 cols = Common::DivideUp(width, block_width); + +    Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, +                                 "yuzu:ASTCDecompress"}; + +    for (u32 z = 0; z < depth; ++z) { +        const u32 depth_offset = z * height * width * 4; +        for (u32 y_index = 0; y_index < rows; ++y_index) { +            auto decompress_stride = [data, width, height, depth, block_width, block_height, output, +                                      rows, cols, z, depth_offset, y_index] { +                const u32 y = y_index * block_height; +                for (u32 x_index = 0; x_index < cols; ++x_index) { +                    const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index; +                    const u32 x = x_index * block_width; + +                    const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; + +                    // Blocks can be at most 12x12 +                    std::array<u32, 12 * 12> uncompData; +                    DecompressBlock(blockPtr, block_width, block_height, uncompData); + +                    u32 decompWidth = std::min(block_width, width - x); +                    u32 decompHeight = std::min(block_height, height - y); + +                    const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); +                    for (u32 h = 0; h < decompHeight; ++h) { +                        std::memcpy(outRow.data() + h * width * 4, +                                    uncompData.data() + h * block_width, decompWidth * 4); +                    }                  } -                ++block_index; -            } +            }; +            workers.QueueWork(std::move(decompress_stride));          } -        depth_offset += height * width * 4; +        workers.WaitForRequests();      }  } | 
