diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 115 | ||||
| -rw-r--r-- | src/video_core/textures/astc.cpp | 80 | ||||
| -rw-r--r-- | src/video_core/textures/astc.h | 2 | ||||
| -rw-r--r-- | src/video_core/textures/convert.cpp | 92 | ||||
| -rw-r--r-- | src/video_core/textures/convert.h | 18 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 2 | 
7 files changed, 175 insertions, 136 deletions
| diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6036d6ed3..b5a327936 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -94,6 +94,8 @@ add_library(video_core STATIC      surface.h      textures/astc.cpp      textures/astc.h +    textures/convert.cpp +    textures/convert.h      textures/decoders.cpp      textures/decoders.h      textures/texture.h diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index e6d47ce41..642ccb269 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -20,7 +20,7 @@  #include "video_core/renderer_opengl/gl_rasterizer_cache.h"  #include "video_core/renderer_opengl/utils.h"  #include "video_core/surface.h" -#include "video_core/textures/astc.h" +#include "video_core/textures/convert.h"  #include "video_core/textures/decoders.h"  namespace OpenGL { @@ -594,103 +594,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)      }  } -static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) { -    union S8Z24 { -        BitField<0, 24, u32> z24; -        BitField<24, 8, u32> s8; -    }; -    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); - -    union Z24S8 { -        BitField<0, 8, u32> s8; -        BitField<8, 24, u32> z24; -    }; -    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); - -    S8Z24 s8z24_pixel{}; -    Z24S8 z24s8_pixel{}; -    constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)}; -    for (std::size_t y = 0; y < height; ++y) { -        for (std::size_t x = 0; x < width; ++x) { -            const std::size_t offset{bpp * (y * width + x)}; -            if (reverse) { -                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); -                s8z24_pixel.s8.Assign(z24s8_pixel.s8); -                s8z24_pixel.z24.Assign(z24s8_pixel.z24); -                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); -            } else { -                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); -                z24s8_pixel.s8.Assign(s8z24_pixel.s8); -                z24s8_pixel.z24.Assign(s8z24_pixel.z24); -                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); -            } -        } -    } -} - -/** - * Helper function to perform software conversion (as needed) when loading a buffer from Switch - * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with - * typical desktop GPUs. - */ -static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, -                                               u32 width, u32 height, u32 depth) { -    switch (pixel_format) { -    case PixelFormat::ASTC_2D_4X4: -    case PixelFormat::ASTC_2D_8X8: -    case PixelFormat::ASTC_2D_8X5: -    case PixelFormat::ASTC_2D_5X4: -    case PixelFormat::ASTC_2D_5X5: -    case PixelFormat::ASTC_2D_4X4_SRGB: -    case PixelFormat::ASTC_2D_8X8_SRGB: -    case PixelFormat::ASTC_2D_8X5_SRGB: -    case PixelFormat::ASTC_2D_5X4_SRGB: -    case PixelFormat::ASTC_2D_5X5_SRGB: -    case PixelFormat::ASTC_2D_10X8: -    case PixelFormat::ASTC_2D_10X8_SRGB: { -        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. -        u32 block_width{}; -        u32 block_height{}; -        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); -        data = -            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); -        break; -    } -    case PixelFormat::S8Z24: -        // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. -        ConvertS8Z24ToZ24S8(data, width, height, false); -        break; -    } -} - -/** - * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to - * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or - * with typical desktop GPUs. - */ -static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, -                                                u32 width, u32 height) { -    switch (pixel_format) { -    case PixelFormat::ASTC_2D_4X4: -    case PixelFormat::ASTC_2D_8X8: -    case PixelFormat::ASTC_2D_4X4_SRGB: -    case PixelFormat::ASTC_2D_8X8_SRGB: -    case PixelFormat::ASTC_2D_5X5: -    case PixelFormat::ASTC_2D_5X5_SRGB: -    case PixelFormat::ASTC_2D_10X8: -    case PixelFormat::ASTC_2D_10X8_SRGB: { -        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", -                     static_cast<u32>(pixel_format)); -        UNREACHABLE(); -        break; -    } -    case PixelFormat::S8Z24: -        // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. -        ConvertS8Z24ToZ24S8(data, width, height, true); -        break; -    } -} -  MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));  void CachedSurface::LoadGLBuffer() {      MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); @@ -719,8 +622,16 @@ void CachedSurface::LoadGLBuffer() {          }      }      for (u32 i = 0; i < params.max_mip_level; i++) { -        ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), -                                           params.MipHeight(i), params.MipDepth(i)); +        const u32 width = params.MipWidth(i); +        const u32 height = params.MipHeight(i); +        const u32 depth = params.MipDepth(i); +        if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { +            // Reserve size for RGBA8 conversion +            constexpr std::size_t rgba_bpp = 4; +            gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); +        } +        Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, +                                               height, depth, true, true);      }  } @@ -743,8 +654,8 @@ void CachedSurface::FlushGLBuffer() {      glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,                        static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());      glPixelStorei(GL_PACK_ROW_LENGTH, 0); -    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, -                                        params.height); +    Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, +                                           params.height, params.depth, true, true);      const u8* const texture_src_data = Memory::GetPointer(params.addr);      ASSERT(texture_src_data);      if (params.is_tiled) { diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index bc50a4876..b508d64e9 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -23,28 +23,12 @@  #include "video_core/textures/astc.h" -class BitStream { +class InputBitStream {  public: -    explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) +    explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)          : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} -    ~BitStream() = default; - -    int GetBitsWritten() const { -        return m_BitsWritten; -    } - -    void WriteBitsR(unsigned int val, unsigned int nBits) { -        for (unsigned int i = 0; i < nBits; i++) { -            WriteBit((val >> (nBits - i - 1)) & 1); -        } -    } - -    void WriteBits(unsigned int val, unsigned int nBits) { -        for (unsigned int i = 0; i < nBits; i++) { -            WriteBit((val >> i) & 1); -        } -    } +    ~InputBitStream() = default;      int GetBitsRead() const {          return m_BitsRead; @@ -71,6 +55,38 @@ public:      }  private: +    const int m_NumBits; +    const unsigned char* m_CurByte; +    int m_NextBit = 0; +    int m_BitsRead = 0; + +    bool done = false; +}; + +class OutputBitStream { +public: +    explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) +        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} + +    ~OutputBitStream() = default; + +    int GetBitsWritten() const { +        return m_BitsWritten; +    } + +    void WriteBitsR(unsigned int val, unsigned int nBits) { +        for (unsigned int i = 0; i < nBits; i++) { +            WriteBit((val >> (nBits - i - 1)) & 1); +        } +    } + +    void WriteBits(unsigned int val, unsigned int nBits) { +        for (unsigned int i = 0; i < nBits; i++) { +            WriteBit((val >> i) & 1); +        } +    } + +private:      void WriteBit(int b) {          if (done) @@ -238,8 +254,8 @@ public:      // Fills result with the values that are encoded in the given      // bitstream. We must know beforehand what the maximum possible      // value is, and how many values we're decoding. -    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, -                                      uint32_t maxRange, uint32_t nValues) { +    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, +                                      InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {          // Determine encoding parameters          IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); @@ -267,7 +283,7 @@ public:      }  private: -    static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, +    static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,                                  uint32_t nBitsPerValue) {          // Implement the algorithm in section C.2.12          uint32_t m[5]; @@ -327,7 +343,7 @@ private:          }      } -    static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, +    static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,                                   uint32_t nBitsPerValue) {          // Implement the algorithm in section C.2.12          uint32_t m[3]; @@ -406,7 +422,7 @@ struct TexelWeightParams {      }  }; -static TexelWeightParams DecodeBlockInfo(BitStream& strm) { +static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {      TexelWeightParams params;      // Read the entire block mode all at once @@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {      return params;  } -static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, +static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,                                uint32_t blockHeight) {      // Don't actually care about the void extent, just read the bits...      for (int i = 0; i < 4; ++i) { @@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode      // We now have enough to decode our integer sequence.      std::vector<IntegerEncodedValue> decodedColorValues; -    BitStream colorStream(data); +    InputBitStream colorStream(data);      IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);      // Once we have the decoded values, we need to dequantize them to the 0-255 range @@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue  #undef READ_INT_VALUES  } -static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, +static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,                              const uint32_t blockHeight, uint32_t* outBuf) { -    BitStream strm(inBuf); +    InputBitStream strm(inBuf);      TexelWeightParams weightParams = DecodeBlockInfo(strm);      // Was there an error? @@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,      // Define color data.      uint8_t colorEndpointData[16];      memset(colorEndpointData, 0, sizeof(colorEndpointData)); -    BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); +    OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);      // Read extra config data...      uint32_t baseCEM = 0; @@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,      memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);      std::vector<IntegerEncodedValue> texelWeightValues; -    BitStream weightStream(texelWeightData); +    InputBitStream weightStream(texelWeightData);      IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,                                                 weightParams.m_MaxWeight, @@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,  namespace Tegra::Texture::ASTC { -std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, +std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,                                  uint32_t depth, uint32_t block_width, uint32_t block_height) {      uint32_t blockIdx = 0;      std::vector<uint8_t> outData(height * width * depth * 4); @@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint          for (uint32_t j = 0; j < height; j += block_height) {              for (uint32_t i = 0; i < width; i += block_width) { -                uint8_t* blockPtr = data.data() + blockIdx * 16; +                const uint8_t* blockPtr = data + blockIdx * 16;                  // Blocks can be at most 12x12                  uint32_t uncompData[144]; diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index d419dd025..991cdba72 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -9,7 +9,7 @@  namespace Tegra::Texture::ASTC { -std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, +std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,                                  uint32_t depth, uint32_t block_width, uint32_t block_height);  } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp new file mode 100644 index 000000000..5e439f036 --- /dev/null +++ b/src/video_core/textures/convert.cpp @@ -0,0 +1,92 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstring> +#include <tuple> +#include <vector> + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/textures/astc.h" +#include "video_core/textures/convert.h" + +namespace Tegra::Texture { + +using VideoCore::Surface::PixelFormat; + +template <bool reverse> +void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { +    union S8Z24 { +        BitField<0, 24, u32> z24; +        BitField<24, 8, u32> s8; +    }; +    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); + +    union Z24S8 { +        BitField<0, 8, u32> s8; +        BitField<8, 24, u32> z24; +    }; +    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); + +    S8Z24 s8z24_pixel{}; +    Z24S8 z24s8_pixel{}; +    constexpr auto bpp{ +        VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)}; +    for (std::size_t y = 0; y < height; ++y) { +        for (std::size_t x = 0; x < width; ++x) { +            const std::size_t offset{bpp * (y * width + x)}; +            if constexpr (reverse) { +                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); +                s8z24_pixel.s8.Assign(z24s8_pixel.s8); +                s8z24_pixel.z24.Assign(z24s8_pixel.z24); +                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); +            } else { +                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); +                z24s8_pixel.s8.Assign(s8z24_pixel.s8); +                z24s8_pixel.z24.Assign(s8z24_pixel.z24); +                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); +            } +        } +    } +} + +static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { +    SwapS8Z24ToZ24S8<false>(data, width, height); +} + +static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { +    SwapS8Z24ToZ24S8<true>(data, width, height); +} + +void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, +                            bool convert_astc, bool convert_s8z24) { +    if (convert_astc && IsPixelFormatASTC(pixel_format)) { +        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. +        u32 block_width{}; +        u32 block_height{}; +        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); +        const std::vector<u8> rgba8_data = +            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); +        std::copy(rgba8_data.begin(), rgba8_data.end(), data); + +    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { +        Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); +    } +} + +void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, +                            bool convert_astc, bool convert_s8z24) { +    if (convert_astc && IsPixelFormatASTC(pixel_format)) { +        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", +                     static_cast<u32>(pixel_format)); +        UNREACHABLE(); + +    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { +        Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); +    } +} + +} // namespace Tegra::Texture
\ No newline at end of file diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h new file mode 100644 index 000000000..07cd8b5da --- /dev/null +++ b/src/video_core/textures/convert.h @@ -0,0 +1,18 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "video_core/surface.h" + +namespace Tegra::Texture { + +void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, +                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24); + +void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, +                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24); + +} // namespace Tegra::Texture
\ No newline at end of file diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 150612aed..cad7340f5 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool              for (u32 xb = 0; xb < blocks_on_x; xb++) {                  const u32 x_start = xb * block_x_elements;                  const u32 x_end = std::min(width, x_start + block_x_elements); -                if (fast) { +                if constexpr (fast) {                      FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,                                       z_start, x_end, y_end, z_end, tile_offset, xy_block_size,                                       layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | 
