diff options
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 48 | 
1 files changed, 22 insertions, 26 deletions
| diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index a814ef483..b84ddd67d 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -116,8 +116,6 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] =             237, 239, 241, 243, 245, 247, 249, 251, 253, 255);  // Input ASTC texture globals -uint current_index = 0; -int bitsread = 0;  int total_bitsread = 0;  uvec4 local_buff; @@ -144,13 +142,6 @@ int texel_vector_index = 0;  uint unquantized_texel_weights[2][144]; -uint SwizzleOffset(uvec2 pos) { -    uint x = pos.x; -    uint y = pos.y; -    return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + -                          (y % 2) * 16 + (x % 16); -} -  // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]  // is the same as [(num_bits - 1):0] and repeats all the way down.  uint Replicate(uint val, uint num_bits, uint to_bit) { @@ -1224,33 +1215,40 @@ void DecompressBlock(ivec3 coord) {              uint local_partition = 0;              if (num_partitions > 1) {                  local_partition = Select2DPartition(partition_index, i, j, num_partitions, -                                                     (block_dims.y * block_dims.x) < 32); +                                                    (block_dims.y * block_dims.x) < 32);              } -            vec4 p; -            uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); -            uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); -            uvec4 plane_vec = uvec4(0); -            uvec4 weight_vec = uvec4(0); -            for (uint c = 0; c < 4; c++) { -                if (params.dual_plane && (((plane_index + 1) & 3) == c)) { -                    plane_vec[c] = 1; +            const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); +            const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); +            const uint weight_offset = (j * block_dims.x + i); +            const uint primary_weight = unquantized_texel_weights[weight_offset][0]; +            uvec4 weight_vec = uvec4(primary_weight); +            if (params.dual_plane) { +                const uint secondary_weight = unquantized_texel_weights[weight_offset][1]; +                for (uint c = 0; c < 4; c++) { +                    const bool is_secondary = ((plane_index + 1u) & 3u) == c; +                    weight_vec[c] = is_secondary ? secondary_weight : primary_weight;                  } -                weight_vec[c] = unquantized_texel_weights[plane_vec[c]][j * block_dims.x + i];              } -            vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); -            p = (Cf / 65535.0); +            const vec4 Cf = +                vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); +            const vec4 p = (Cf / 65535.0);              imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);          }      }  } + +uint SwizzleOffset(uvec2 pos) { +    uint x = pos.x; +    uint y = pos.y; +    return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + +                          (y % 2) * 16 + (x % 16); +} +  void main() {      uvec3 pos = gl_GlobalInvocationID;      pos.x <<= BYTES_PER_BLOCK_LOG2; - -    // Read as soon as possible due to its latency      const uint swizzle = SwizzleOffset(pos.xy); -      const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;      uint offset = 0; @@ -1264,8 +1262,6 @@ void main() {      if (any(greaterThanEqual(coord, imageSize(dest_image)))) {          return;      } -    current_index = 0; -    bitsread = 0;      local_buff = astc_data[offset / 16];      DecompressBlock(coord);  } | 
