diff options
| author | raven02 <jacky.kktsui@yahoo.com.hk> | 2018-09-19 19:53:11 +0800 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-09-19 19:53:11 +0800 | 
| commit | c8f9bbbf859c0e38cf691b64c67761382fcebfc2 (patch) | |
| tree | 99529c2277a6b740a6e278985c5147fa649c5497 /src/video_core | |
| parent | b91f7d5d67a67115926ad03526f71a7cc3dfb326 (diff) | |
| parent | b33ce787b7959e1bfd3b5ae4886b6e137fb97711 (diff) | |
Merge branch 'master' into tlds
Diffstat (limited to 'src/video_core')
24 files changed, 651 insertions, 195 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4a79ce39c..f5ae57039 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(video_core STATIC      engines/maxwell_dma.cpp      engines/maxwell_dma.h      engines/shader_bytecode.h +    engines/shader_header.h      gpu.cpp      gpu.h      macro_interpreter.cpp diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index dcf9ef8b9..021b83eaa 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -26,7 +26,7 @@ public:      void WriteReg(u32 method, u32 value);      struct Regs { -        static constexpr size_t NUM_REGS = 0x258; +        static constexpr std::size_t NUM_REGS = 0x258;          struct Surface {              RenderTargetFormat format; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 329079ddd..8afd26fe9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() {  void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {      // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. -    auto& shader = state.shader_stages[static_cast<size_t>(stage)]; -    auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)]; +    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; +    auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)];      auto& buffer = shader.const_buffers[bind_data.index]; @@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {  std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {      std::vector<Texture::FullTextureInfo> textures; -    auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)]; +    auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];      auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];      ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);      GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;      // Offset into the texture constbuffer where the texture info begins. -    static constexpr size_t TextureInfoOffset = 0x20; +    static constexpr std::size_t TextureInfoOffset = 0x20;      for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;           current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { @@ -360,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt      return textures;  } -Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const { -    auto& shader = state.shader_stages[static_cast<size_t>(stage)]; +Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, +                                                    std::size_t offset) const { +    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];      auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];      ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d3be900a4..b81b0723d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -34,17 +34,17 @@ public:      /// Register structure of the Maxwell3D engine.      /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.      struct Regs { -        static constexpr size_t NUM_REGS = 0xE00; - -        static constexpr size_t NumRenderTargets = 8; -        static constexpr size_t NumViewports = 16; -        static constexpr size_t NumCBData = 16; -        static constexpr size_t NumVertexArrays = 32; -        static constexpr size_t NumVertexAttributes = 32; -        static constexpr size_t MaxShaderProgram = 6; -        static constexpr size_t MaxShaderStage = 5; +        static constexpr std::size_t NUM_REGS = 0xE00; + +        static constexpr std::size_t NumRenderTargets = 8; +        static constexpr std::size_t NumViewports = 16; +        static constexpr std::size_t NumCBData = 16; +        static constexpr std::size_t NumVertexArrays = 32; +        static constexpr std::size_t NumVertexAttributes = 32; +        static constexpr std::size_t MaxShaderProgram = 6; +        static constexpr std::size_t MaxShaderStage = 5;          // Maximum number of const buffers per shader stage. -        static constexpr size_t MaxConstBuffers = 18; +        static constexpr std::size_t MaxConstBuffers = 18;          enum class QueryMode : u32 {              Write = 0, @@ -443,9 +443,9 @@ public:              }          }; -        bool IsShaderConfigEnabled(size_t index) const { +        bool IsShaderConfigEnabled(std::size_t index) const {              // The VertexB is always enabled. -            if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) { +            if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {                  return true;              }              return shader_config[index].enable != 0; @@ -571,7 +571,7 @@ public:                          BitField<25, 3, u32> map_7;                      }; -                    u32 GetMap(size_t index) const { +                    u32 GetMap(std::size_t index) const {                          const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,                                                                       map_4, map_5, map_6, map_7};                          ASSERT(index < maps.size()); @@ -925,7 +925,7 @@ public:      std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;      /// Returns the texture information for a specific texture in a specific shader stage. -    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; +    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;  private:      VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c24d33d5c..aa7481b8c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() {      ASSERT(regs.dst_params.pos_y == 0);      if (regs.exec.is_dst_linear == regs.exec.is_src_linear) { -        size_t copy_size = regs.x_count; +        std::size_t copy_size = regs.x_count;          // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D          // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count). diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 7882f16e0..311ccb616 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -23,7 +23,7 @@ public:      void WriteReg(u32 method, u32 value);      struct Regs { -        static constexpr size_t NUM_REGS = 0x1D6; +        static constexpr std::size_t NUM_REGS = 0x1D6;          struct Parameters {              union { diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d6e2397f2..7e1de0fa1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -20,10 +20,10 @@ namespace Tegra::Shader {  struct Register {      /// Number of registers -    static constexpr size_t NumRegisters = 256; +    static constexpr std::size_t NumRegisters = 256;      /// Register 255 is special cased to always be 0 -    static constexpr size_t ZeroIndex = 255; +    static constexpr std::size_t ZeroIndex = 255;      enum class Size : u64 {          Byte = 0, @@ -240,6 +240,41 @@ enum class FlowCondition : u64 {      Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?  }; +enum class ControlCode : u64 { +    F = 0, +    LT = 1, +    EQ = 2, +    LE = 3, +    GT = 4, +    NE = 5, +    GE = 6, +    Num = 7, +    Nan = 8, +    LTU = 9, +    EQU = 10, +    LEU = 11, +    GTU = 12, +    NEU = 13, +    GEU = 14, +    // +    OFF = 16, +    LO = 17, +    SFF = 18, +    LS = 19, +    HI = 20, +    SFT = 21, +    HS = 22, +    OFT = 23, +    CSM_TA = 24, +    CSM_TR = 25, +    CSM_MX = 26, +    FCSM_TA = 27, +    FCSM_TR = 28, +    FCSM_MX = 29, +    RLE = 30, +    RGT = 31, +}; +  enum class PredicateResultMode : u64 {      None = 0x0,      NotZero = 0x3, @@ -271,6 +306,15 @@ enum class TextureProcessMode : u64 {      LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL  }; +enum class TextureMiscMode : u64 { +    DC, +    AOFFI, // Uses Offset +    NDV, +    NODEP, +    MZ, +    PTP, +}; +  enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };  enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; @@ -546,6 +590,15 @@ union Instruction {      } pset;      union { +        BitField<0, 3, u64> pred0; +        BitField<3, 3, u64> pred3; +        BitField<8, 5, ControlCode> cc; // flag in cc +        BitField<39, 3, u64> pred39; +        BitField<42, 1, u64> neg_pred39; +        BitField<45, 4, PredOperation> op; // op with pred39 +    } csetp; + +    union {          BitField<39, 3, u64> pred39;          BitField<42, 1, u64> neg_pred;          BitField<43, 1, u64> neg_a; @@ -590,42 +643,127 @@ union Instruction {          BitField<28, 1, u64> array;          BitField<29, 2, TextureType> texture_type;          BitField<31, 4, u64> component_mask; +        BitField<49, 1, u64> nodep_flag; +        BitField<50, 1, u64> dc_flag; +        BitField<54, 1, u64> aoffi_flag;          BitField<55, 3, TextureProcessMode> process_mode; -        bool IsComponentEnabled(size_t component) const { +        bool IsComponentEnabled(std::size_t component) const {              return ((1ull << component) & component_mask) != 0;          } + +        TextureProcessMode GetTextureProcessMode() const { +            return process_mode; +        } + +        bool UsesMiscMode(TextureMiscMode mode) const { +            switch (mode) { +            case TextureMiscMode::DC: +                return dc_flag != 0; +            case TextureMiscMode::NODEP: +                return nodep_flag != 0; +            case TextureMiscMode::AOFFI: +                return aoffi_flag != 0; +            default: +                break; +            } +            return false; +        }      } tex;      union {          BitField<22, 6, TextureQueryType> query_type;          BitField<31, 4, u64> component_mask; +        BitField<49, 1, u64> nodep_flag; + +        bool UsesMiscMode(TextureMiscMode mode) const { +            switch (mode) { +            case TextureMiscMode::NODEP: +                return nodep_flag != 0; +            default: +                break; +            } +            return false; +        }      } txq;      union {          BitField<28, 1, u64> array;          BitField<29, 2, TextureType> texture_type;          BitField<31, 4, u64> component_mask; +        BitField<35, 1, u64> ndv_flag; +        BitField<49, 1, u64> nodep_flag; -        bool IsComponentEnabled(size_t component) const { +        bool IsComponentEnabled(std::size_t component) const {              return ((1ull << component) & component_mask) != 0;          } + +        bool UsesMiscMode(TextureMiscMode mode) const { +            switch (mode) { +            case TextureMiscMode::NDV: +                return (ndv_flag != 0); +            case TextureMiscMode::NODEP: +                return (nodep_flag != 0); +            default: +                break; +            } +            return false; +        }      } tmml;      union {          BitField<28, 1, u64> array;          BitField<29, 2, TextureType> texture_type; +        BitField<35, 1, u64> ndv_flag; +        BitField<49, 1, u64> nodep_flag; +        BitField<50, 1, u64> dc_flag; +        BitField<54, 2, u64> info;          BitField<56, 2, u64> component; + +        bool UsesMiscMode(TextureMiscMode mode) const { +            switch (mode) { +            case TextureMiscMode::NDV: +                return ndv_flag != 0; +            case TextureMiscMode::NODEP: +                return nodep_flag != 0; +            case TextureMiscMode::DC: +                return dc_flag != 0; +            case TextureMiscMode::AOFFI: +                return info == 1; +            case TextureMiscMode::PTP: +                return info == 2; +            default: +                break; +            } +            return false; +        }      } tld4;      union { +        BitField<49, 1, u64> nodep_flag; +        BitField<50, 1, u64> dc_flag; +        BitField<51, 1, u64> aoffi_flag;          BitField<52, 2, u64> component; + +        bool UsesMiscMode(TextureMiscMode mode) const { +            switch (mode) { +            case TextureMiscMode::DC: +                return dc_flag != 0; +            case TextureMiscMode::NODEP: +                return nodep_flag != 0; +            case TextureMiscMode::AOFFI: +                return aoffi_flag != 0; +            default: +                break; +            } +            return false; +        }      } tld4s;      union {          BitField<0, 8, Register> gpr0;          BitField<28, 8, Register> gpr28; -        BitField<49, 1, u64> nodep; +        BitField<49, 1, u64> nodep_flag;          BitField<50, 3, u64> component_mask_selector;          BitField<53, 4, u64> texture_info; @@ -645,6 +783,37 @@ union Instruction {              UNREACHABLE();          } +        TextureProcessMode GetTextureProcessMode() const { +            switch (texture_info) { +            case 0: +            case 2: +            case 6: +            case 8: +            case 9: +            case 11: +                return TextureProcessMode::LZ; +            case 3: +            case 5: +            case 13: +                return TextureProcessMode::LL; +            default: +                break; +            } +            return TextureProcessMode::None; +        } + +        bool UsesMiscMode(TextureMiscMode mode) const { +            switch (mode) { +            case TextureMiscMode::DC: +                return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; +            case TextureMiscMode::NODEP: +                return nodep_flag != 0; +            default: +                break; +            } +            return false; +        } +          bool IsArrayTexture() const {              // TEXS only supports Texture2D arrays.              return texture_info >= 7 && texture_info <= 9; @@ -654,7 +823,7 @@ union Instruction {              return gpr28.Value() != Register::ZeroIndex;          } -        bool IsComponentEnabled(size_t component) const { +        bool IsComponentEnabled(std::size_t component) const {              static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{                  {},                  {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, @@ -662,7 +831,7 @@ union Instruction {                  {0x7, 0xb, 0xd, 0xe, 0xf},              }}; -            size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; +            std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};              index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;              u32 mask = mask_lut[index][component_mask_selector]; @@ -673,6 +842,7 @@ union Instruction {      } texs;      union { +        BitField<49, 1, u64> nodep_flag;          BitField<53, 4, u64> texture_info;          TextureType GetTextureType() const { @@ -693,6 +863,26 @@ union Instruction {              UNREACHABLE();          } +        TextureProcessMode GetTextureProcessMode() const { +            if (texture_info == 1 || texture_info == 5 || texture_info == 12) +                return TextureProcessMode::LL; +            return TextureProcessMode::LZ; +        } + +        bool UsesMiscMode(TextureMiscMode mode) const { +            switch (mode) { +            case TextureMiscMode::AOFFI: +                return texture_info == 12 || texture_info == 4; +            case TextureMiscMode::MZ: +                return texture_info == 5; +            case TextureMiscMode::NODEP: +                return nodep_flag != 0; +            default: +                break; +            } +            return false; +        } +          bool IsArrayTexture() const {              // TEXS only supports Texture2D arrays.              return texture_info == 8; @@ -735,6 +925,7 @@ union Instruction {          BitField<36, 5, u64> index;      } cbuf36; +    BitField<47, 1, u64> generates_cc;      BitField<61, 1, u64> is_b_imm;      BitField<60, 1, u64> is_b_gpr;      BitField<59, 1, u64> is_c_gpr; @@ -859,6 +1050,7 @@ public:          ISET_IMM,          PSETP,          PSET, +        CSETP,          XMAD_IMM,          XMAD_CR,          XMAD_RC, @@ -947,7 +1139,7 @@ public:  private:      struct Detail {      private: -        static constexpr size_t opcode_bitsize = 16; +        static constexpr std::size_t opcode_bitsize = 16;          /**           * Generates the mask and the expected value after masking from a given bitstring. @@ -956,8 +1148,8 @@ private:           */          static auto GetMaskAndExpect(const char* const bitstring) {              u16 mask = 0, expect = 0; -            for (size_t i = 0; i < opcode_bitsize; i++) { -                const size_t bit_position = opcode_bitsize - i - 1; +            for (std::size_t i = 0; i < opcode_bitsize; i++) { +                const std::size_t bit_position = opcode_bitsize - i - 1;                  switch (bitstring[i]) {                  case '0':                      mask |= 1 << bit_position; @@ -1095,6 +1287,7 @@ private:              INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),              INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),              INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), +            INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),              INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),              INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),              INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h new file mode 100644 index 000000000..a885ee3cf --- /dev/null +++ b/src/video_core/engines/shader_header.h @@ -0,0 +1,103 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra::Shader { + +enum class OutputTopology : u32 { +    PointList = 1, +    LineStrip = 6, +    TriangleStrip = 7, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture +struct Header { +    union { +        BitField<0, 5, u32> sph_type; +        BitField<5, 5, u32> version; +        BitField<10, 4, u32> shader_type; +        BitField<14, 1, u32> mrt_enable; +        BitField<15, 1, u32> kills_pixels; +        BitField<16, 1, u32> does_global_store; +        BitField<17, 4, u32> sass_version; +        BitField<21, 5, u32> reserved; +        BitField<26, 1, u32> does_load_or_store; +        BitField<27, 1, u32> does_fp64; +        BitField<28, 4, u32> stream_out_mask; +    } common0; + +    union { +        BitField<0, 24, u32> shader_local_memory_low_size; +        BitField<24, 8, u32> per_patch_attribute_count; +    } common1; + +    union { +        BitField<0, 24, u32> shader_local_memory_high_size; +        BitField<24, 8, u32> threads_per_input_primitive; +    } common2; + +    union { +        BitField<0, 24, u32> shader_local_memory_crs_size; +        BitField<24, 4, OutputTopology> output_topology; +        BitField<28, 4, u32> reserved; +    } common3; + +    union { +        BitField<0, 12, u32> max_output_vertices; +        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. +        BitField<24, 4, u32> reserved; +        BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. +    } common4; + +    union { +        struct { +            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA +            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB +            INSERT_PADDING_BYTES(16); // ImapGenericVector[32] +            INSERT_PADDING_BYTES(2);  // ImapColor +            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC +            INSERT_PADDING_BYTES(5);  // ImapFixedFncTexture[10] +            INSERT_PADDING_BYTES(1);  // ImapReserved +            INSERT_PADDING_BYTES(3);  // OmapSystemValuesA +            INSERT_PADDING_BYTES(1);  // OmapSystemValuesB +            INSERT_PADDING_BYTES(16); // OmapGenericVector[32] +            INSERT_PADDING_BYTES(2);  // OmapColor +            INSERT_PADDING_BYTES(2);  // OmapSystemValuesC +            INSERT_PADDING_BYTES(5);  // OmapFixedFncTexture[10] +            INSERT_PADDING_BYTES(1);  // OmapReserved +        } vtg; + +        struct { +            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA +            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB +            INSERT_PADDING_BYTES(32); // ImapGenericVector[32] +            INSERT_PADDING_BYTES(2);  // ImapColor +            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC +            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] +            INSERT_PADDING_BYTES(2);  // ImapReserved +            struct { +                u32 target; +                union { +                    BitField<0, 1, u32> sample_mask; +                    BitField<1, 1, u32> depth; +                    BitField<2, 30, u32> reserved; +                }; +            } omap; +            bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { +                const u32 bit = render_target * 4 + component; +                return omap.target & (1 << bit); +            } +        } ps; +    }; +}; + +static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); + +} // namespace Tegra::Shader diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 7329ca766..5cc1e19ca 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 {      R32_UINT = 0xE4,      R32_FLOAT = 0xE5,      B5G6R5_UNORM = 0xE8, +    BGR5A1_UNORM = 0xE9,      RG8_UNORM = 0xEA,      RG8_SNORM = 0xEB,      R16_UNORM = 0xEE, diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 7d836b816..cee0baaf3 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -152,7 +152,7 @@ private:      boost::optional<u32>          delayed_pc; ///< Program counter to execute at after the delay slot is executed. -    static constexpr size_t NumMacroRegisters = 8; +    static constexpr std::size_t NumMacroRegisters = 8;      /// General purpose macro registers.      std::array<u32, NumMacroRegisters> registers = {}; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0b5d18bcb..578aca789 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -12,10 +12,10 @@  namespace OpenGL { -OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} +OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} -GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment, -                                      bool cache) { +GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, +                                      std::size_t alignment, bool cache) {      auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();      const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; @@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz      return uploaded_offset;  } -GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) { +GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, +                                          std::size_t alignment) {      AlignBuffer(alignment);      std::memcpy(buffer_ptr, raw_pointer, size);      GLintptr uploaded_offset = buffer_offset; @@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size,      return uploaded_offset;  } -void OGLBufferCache::Map(size_t max_size) { +void OGLBufferCache::Map(std::size_t max_size) {      bool invalidate;      std::tie(buffer_ptr, buffer_offset_base, invalidate) =          stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); @@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const {      return stream_buffer.GetHandle();  } -void OGLBufferCache::AlignBuffer(size_t alignment) { +void OGLBufferCache::AlignBuffer(std::size_t alignment) {      // Align the offset, not the mapped pointer      GLintptr offset_aligned = -        static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); +        static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));      buffer_ptr += offset_aligned - buffer_offset;      buffer_offset = offset_aligned;  } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6da862902..6c18461f4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -19,32 +19,32 @@ struct CachedBufferEntry final {          return addr;      } -    size_t GetSizeInBytes() const { +    std::size_t GetSizeInBytes() const {          return size;      }      VAddr addr; -    size_t size; +    std::size_t size;      GLintptr offset; -    size_t alignment; +    std::size_t alignment;  };  class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {  public: -    explicit OGLBufferCache(size_t size); +    explicit OGLBufferCache(std::size_t size); -    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4, +    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,                            bool cache = true); -    GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4); +    GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); -    void Map(size_t max_size); +    void Map(std::size_t max_size);      void Unmap();      GLuint GetHandle() const;  protected: -    void AlignBuffer(size_t alignment); +    void AlignBuffer(std::size_t alignment);  private:      OGLStreamBuffer stream_buffer; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7e1bba67d..274c2dbcf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -46,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100,  RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)      : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {      // Create sampler objects -    for (size_t i = 0; i < texture_samplers.size(); ++i) { +    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {          texture_samplers[i].Create();          state.texture_units[i].sampler = texture_samplers[i].sampler.handle;      } @@ -181,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() {      u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;      u32 current_texture_bindpoint = 0; -    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { +    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {          const auto& shader_config = gpu.regs.shader_config[index];          const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -190,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() {              continue;          } -        const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 +        const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5          GLShader::MaxwellUniformData ubo{};          ubo.SetFromRegs(gpu.state.shader_stages[stage]);          const GLintptr offset = buffer_cache.UploadHostMemory( -            &ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment)); +            &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));          // Bind the buffer          glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo)); @@ -238,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() {      shader_program_manager->UseTrivialGeometryShader();  } -size_t RasterizerOpenGL::CalculateVertexArraysSize() const { +std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {      const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; -    size_t size = 0; +    std::size_t size = 0;      for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {          if (!regs.vertex_array[index].IsEnabled())              continue; @@ -299,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {  void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,                                               bool preserve_contents, -                                             boost::optional<size_t> single_color_target) { +                                             boost::optional<std::size_t> single_color_target) {      MICROPROFILE_SCOPE(OpenGL_Framebuffer);      const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; @@ -330,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep          } else {              // Multiple color attachments are enabled              std::array<GLenum, Maxwell::NumRenderTargets> buffers; -            for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { +            for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {                  Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);                  buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);                  glFramebufferTexture2D( @@ -342,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep          }      } else {          // No color attachments are enabled - zero out all of them -        for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { +        for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {              glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,                                     GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,                                     0, 0); @@ -462,15 +462,15 @@ void RasterizerOpenGL::DrawArrays() {      state.draw.vertex_buffer = buffer_cache.GetHandle();      state.Apply(); -    size_t buffer_size = CalculateVertexArraysSize(); +    std::size_t buffer_size = CalculateVertexArraysSize();      if (is_indexed) { -        buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; +        buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;      }      // Uniform space for the 5 shader stages      buffer_size = -        Common::AlignUp<size_t>(buffer_size, 4) + +        Common::AlignUp<std::size_t>(buffer_size, 4) +          (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;      // Add space for at least 18 constant buffers @@ -644,7 +644,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad      MICROPROFILE_SCOPE(OpenGL_UBO);      const auto& gpu = Core::System::GetInstance().GPU();      const auto& maxwell3d = gpu.Maxwell3D(); -    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; +    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];      const auto& entries = shader->GetShaderEntries().const_buffer_entries;      constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; @@ -667,7 +667,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad              continue;          } -        size_t size = 0; +        std::size_t size = 0;          if (used_buffer.IsIndirect()) {              // Buffer is accessed indirectly, so upload the entire thing @@ -689,7 +689,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad          ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");          GLintptr const_buffer_offset = buffer_cache.UploadMemory( -            buffer.address, size, static_cast<size_t>(uniform_buffer_alignment)); +            buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));          // Now configure the bindpoint of the buffer inside the shader          glUniformBlockBinding(shader->GetProgramHandle(), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 163412882..bf9560bdc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -73,7 +73,7 @@ public:      };      /// Maximum supported size that a constbuffer can have in bytes. -    static constexpr size_t MaxConstbufferSize = 0x10000; +    static constexpr std::size_t MaxConstbufferSize = 0x10000;      static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,                    "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); @@ -106,7 +106,7 @@ private:       */      void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,                                 bool preserve_contents = true, -                               boost::optional<size_t> single_color_target = {}); +                               boost::optional<std::size_t> single_color_target = {});      /*       * Configures the current constbuffers to use for the draw command. @@ -180,12 +180,12 @@ private:      std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; -    static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; +    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;      OGLBufferCache buffer_cache;      OGLFramebuffer framebuffer;      GLint uniform_buffer_alignment; -    size_t CalculateVertexArraysSize() const; +    std::size_t CalculateVertexArraysSize() const;      void SetupVertexArrays(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 32001e44b..86682d7cb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -75,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {      return params;  } -/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) { +/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {      const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};      SurfaceParams params{};      params.addr = TryGetCpuAddr(config.Address()); @@ -167,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form      {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                                // RG8S      {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // RG32UI      {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // R32UI +    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8      // Depth formats      {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F @@ -203,7 +204,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {  }  static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { -    ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); +    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());      auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];      ASSERT(component_type == format.component_type); @@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType  static bool IsPixelFormatASTC(PixelFormat format) {      switch (format) {      case PixelFormat::ASTC_2D_4X4: +    case PixelFormat::ASTC_2D_8X8:          return true;      default:          return false; @@ -223,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {      switch (format) {      case PixelFormat::ASTC_2D_4X4:          return {4, 4}; +    case PixelFormat::ASTC_2D_8X8: +        return {8, 8};      default:          LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));          UNREACHABLE(); @@ -256,7 +260,7 @@ static bool IsFormatBCn(PixelFormat format) {  }  template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size, +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size,                  VAddr addr) {      constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;      constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); @@ -267,7 +271,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t          const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};          const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(              addr, tile_size, bytes_per_pixel, stride, height, block_height); -        const size_t size_to_copy{std::min(gl_buffer_size, data.size())}; +        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};          memcpy(gl_buffer, data.data(), size_to_copy);      } else {          // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should @@ -278,7 +282,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t      }  } -static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),                              SurfaceParams::MaxPixelFormat>      morton_to_gl_fns = {          // clang-format off @@ -327,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),          MortonCopy<true, PixelFormat::RG8S>,          MortonCopy<true, PixelFormat::RG32UI>,          MortonCopy<true, PixelFormat::R32UI>, +        MortonCopy<true, PixelFormat::ASTC_2D_8X8>,          MortonCopy<true, PixelFormat::Z32F>,          MortonCopy<true, PixelFormat::Z16>,          MortonCopy<true, PixelFormat::Z24S8>, @@ -335,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),          // clang-format on  }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),                              SurfaceParams::MaxPixelFormat>      gl_to_morton_fns = {          // clang-format off @@ -386,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),          MortonCopy<false, PixelFormat::RG8S>,          MortonCopy<false, PixelFormat::RG32UI>,          MortonCopy<false, PixelFormat::R32UI>, +        nullptr,          MortonCopy<false, PixelFormat::Z32F>,          MortonCopy<false, PixelFormat::Z16>,          MortonCopy<false, PixelFormat::Z24S8>, @@ -513,9 +519,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {      S8Z24 input_pixel{};      Z24S8 output_pixel{};      constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; -    for (size_t y = 0; y < height; ++y) { -        for (size_t x = 0; x < width; ++x) { -            const size_t offset{bpp * (y * width + x)}; +    for (std::size_t y = 0; y < height; ++y) { +        for (std::size_t x = 0; x < width; ++x) { +            const std::size_t offset{bpp * (y * width + x)};              std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));              output_pixel.s8.Assign(input_pixel.s8);              output_pixel.z24.Assign(input_pixel.z24); @@ -526,9 +532,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {  static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {      constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; -    for (size_t y = 0; y < height; ++y) { -        for (size_t x = 0; x < width; ++x) { -            const size_t offset{bpp * (y * width + x)}; +    for (std::size_t y = 0; y < height; ++y) { +        for (std::size_t x = 0; x < width; ++x) { +            const std::size_t offset{bpp * (y * width + x)};              const u8 temp{data[offset]};              data[offset] = data[offset + 1];              data[offset + 1] = temp; @@ -544,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {  static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,                                                 u32 width, u32 height) {      switch (pixel_format) { -    case PixelFormat::ASTC_2D_4X4: { +    case PixelFormat::ASTC_2D_4X4: +    case PixelFormat::ASTC_2D_8X8: {          // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.          u32 block_width{};          u32 block_height{}; @@ -591,13 +598,13 @@ void CachedSurface::LoadGLBuffer() {              UNREACHABLE();          } -        gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); -        morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( +        gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size); +        morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](              params.width, params.block_height, params.height, gl_buffer.data(), copy_size,              params.addr);      } else {          const u8* const texture_src_data_end{texture_src_data + -                                             (static_cast<size_t>(params.depth) * copy_size)}; +                                             (static_cast<std::size_t>(params.depth) * copy_size)};          gl_buffer.assign(texture_src_data, texture_src_data_end);      } @@ -616,7 +623,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle      MICROPROFILE_SCOPE(OpenGL_TextureUL); -    ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height * +    ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *                                     GetGLBytesPerPixel(params.pixel_format) * params.depth);      const auto& rect{params.GetRect()}; @@ -624,8 +631,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle      // Load data from memory to the surface      const GLint x0 = static_cast<GLint>(rect.left);      const GLint y0 = static_cast<GLint>(rect.bottom); -    const size_t buffer_offset = -        static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * +    const std::size_t buffer_offset = +        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width + +                                 static_cast<std::size_t>(x0)) *          GetGLBytesPerPixel(params.pixel_format);      const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); @@ -727,7 +735,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {      return GetSurface(depth_params, preserve_contents);  } -Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) { +Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {      const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};      ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); @@ -825,7 +833,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,          auto source_format = GetFormatTuple(params.pixel_format, params.component_type);          auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); -        size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); +        std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());          glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);          glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); @@ -849,7 +857,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,                  LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "                                    "reinterpretation but the texture is tiled.");              } -            size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); +            std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();              std::vector<u8> data(remaining_size);              Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());              glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 57ea8593b..d7a4bc37f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -70,19 +70,20 @@ struct SurfaceParams {          RG8S = 42,          RG32UI = 43,          R32UI = 44, +        ASTC_2D_8X8 = 45,          MaxColorFormat,          // Depth formats -        Z32F = 45, -        Z16 = 46, +        Z32F = 46, +        Z16 = 47,          MaxDepthFormat,          // DepthStencil formats -        Z24S8 = 47, -        S8Z24 = 48, -        Z32FS8 = 49, +        Z24S8 = 48, +        S8Z24 = 49, +        Z32FS8 = 50,          MaxDepthStencilFormat, @@ -90,7 +91,7 @@ struct SurfaceParams {          Invalid = 255,      }; -    static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max); +    static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);      enum class ComponentType {          Invalid = 0, @@ -192,6 +193,7 @@ struct SurfaceParams {              1, // RG8S              1, // RG32UI              1, // R32UI +            4, // ASTC_2D_8X8              1, // Z32F              1, // Z16              1, // Z24S8 @@ -199,8 +201,8 @@ struct SurfaceParams {              1, // Z32FS8          }}; -        ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); -        return compression_factor_table[static_cast<size_t>(format)]; +        ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); +        return compression_factor_table[static_cast<std::size_t>(format)];      }      static constexpr u32 GetFormatBpp(PixelFormat format) { @@ -253,6 +255,7 @@ struct SurfaceParams {              16,  // RG8S              64,  // RG32UI              32,  // R32UI +            16,  // ASTC_2D_8X8              32,  // Z32F              16,  // Z16              32,  // Z24S8 @@ -260,8 +263,8 @@ struct SurfaceParams {              64,  // Z32FS8          }}; -        ASSERT(static_cast<size_t>(format) < bpp_table.size()); -        return bpp_table[static_cast<size_t>(format)]; +        ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); +        return bpp_table[static_cast<std::size_t>(format)];      }      u32 GetFormatBpp() const { @@ -316,6 +319,8 @@ struct SurfaceParams {              return PixelFormat::R11FG11FB10F;          case Tegra::RenderTargetFormat::B5G6R5_UNORM:              return PixelFormat::B5G6R5U; +        case Tegra::RenderTargetFormat::BGR5A1_UNORM: +            return PixelFormat::A1B5G5R5U;          case Tegra::RenderTargetFormat::RGBA32_UINT:              return PixelFormat::RGBA32UI;          case Tegra::RenderTargetFormat::R8_UNORM: @@ -522,6 +527,8 @@ struct SurfaceParams {              return PixelFormat::BC6H_SF16;          case Tegra::Texture::TextureFormat::ASTC_2D_4X4:              return PixelFormat::ASTC_2D_4X4; +        case Tegra::Texture::TextureFormat::ASTC_2D_8X8: +            return PixelFormat::ASTC_2D_8X8;          case Tegra::Texture::TextureFormat::R16_G16:              switch (component_type) {              case Tegra::Texture::ComponentType::FLOAT: @@ -576,6 +583,7 @@ struct SurfaceParams {          case Tegra::RenderTargetFormat::RG16_UNORM:          case Tegra::RenderTargetFormat::R16_UNORM:          case Tegra::RenderTargetFormat::B5G6R5_UNORM: +        case Tegra::RenderTargetFormat::BGR5A1_UNORM:          case Tegra::RenderTargetFormat::RG8_UNORM:          case Tegra::RenderTargetFormat::RGBA16_UNORM:              return ComponentType::UNorm; @@ -636,16 +644,18 @@ struct SurfaceParams {      }      static SurfaceType GetFormatType(PixelFormat pixel_format) { -        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) { +        if (static_cast<std::size_t>(pixel_format) < +            static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {              return SurfaceType::ColorTexture;          } -        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) { +        if (static_cast<std::size_t>(pixel_format) < +            static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {              return SurfaceType::Depth;          } -        if (static_cast<size_t>(pixel_format) < -            static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) { +        if (static_cast<std::size_t>(pixel_format) < +            static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {              return SurfaceType::DepthStencil;          } @@ -659,7 +669,7 @@ struct SurfaceParams {      MathUtil::Rectangle<u32> GetRect() const;      /// Returns the size of this surface in bytes, adjusted for compression -    size_t SizeInBytes() const { +    std::size_t SizeInBytes() const {          const u32 compression_factor{GetCompressionFactor(pixel_format)};          ASSERT(width % compression_factor == 0);          ASSERT(height % compression_factor == 0); @@ -671,7 +681,7 @@ struct SurfaceParams {      static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);      /// Creates SurfaceParams from a framebuffer configuration -    static SurfaceParams CreateForFramebuffer(size_t index); +    static SurfaceParams CreateForFramebuffer(std::size_t index);      /// Creates SurfaceParams for a depth buffer configuration      static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, @@ -694,7 +704,7 @@ struct SurfaceParams {      u32 height;      u32 depth;      u32 unaligned_height; -    size_t size_in_bytes; +    std::size_t size_in_bytes;      SurfaceTarget target;  }; @@ -711,7 +721,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {  namespace std {  template <>  struct hash<SurfaceReserveKey> { -    size_t operator()(const SurfaceReserveKey& k) const { +    std::size_t operator()(const SurfaceReserveKey& k) const {          return k.Hash();      }  }; @@ -727,7 +737,7 @@ public:          return params.addr;      } -    size_t GetSizeInBytes() const { +    std::size_t GetSizeInBytes() const {          return params.size_in_bytes;      } @@ -775,7 +785,7 @@ public:      Surface GetDepthBufferSurface(bool preserve_contents);      /// Get the color surface based on the framebuffer configuration and the specified render target -    Surface GetColorBufferSurface(size_t index, bool preserve_contents); +    Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);      /// Flushes the surface to Switch memory      void FlushSurface(const Surface& surface); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 61080f5cc..894fe6eae 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -14,7 +14,7 @@ namespace OpenGL {  /// Gets the address for the specified shader stage program  static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {      const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); -    const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; +    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];      return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +                                                 shader_config.offset);  } @@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) {  /// Helper function to set shader uniform block bindings for a single shader stage  static void SetShaderUniformBlockBinding(GLuint shader, const char* name, -                                         Maxwell::ShaderStage binding, size_t expected_size) { +                                         Maxwell::ShaderStage binding, std::size_t expected_size) {      const GLuint ub_index = glGetUniformBlockIndex(shader, name);      if (ub_index == GL_INVALID_INDEX) {          return; @@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name,      GLint ub_size = 0;      glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); -    ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size, +    ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,                 "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);      glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));  } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6e6febcbc..9bafe43a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -28,7 +28,7 @@ public:      }      /// Gets the size of the shader in guest memory, required for cache management -    size_t GetSizeInBytes() const { +    std::size_t GetSizeInBytes() const {          return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);      } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7a5321b9c..00cd05e62 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -12,6 +12,7 @@  #include "common/assert.h"  #include "common/common_types.h"  #include "video_core/engines/shader_bytecode.h" +#include "video_core/engines/shader_header.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;  using Tegra::Shader::SubOp;  constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -constexpr u32 PROGRAM_HEADER_SIZE = 0x50; +constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);  class DecompileFail : public std::runtime_error {  public: @@ -189,7 +190,7 @@ public:  private:      void AppendIndentation() { -        shader_source.append(static_cast<size_t>(scope) * 4, ' '); +        shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');      }      std::string shader_source; @@ -208,7 +209,7 @@ public:          UnsignedInteger,      }; -    GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} +    GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}      /// Gets the GLSL type string for a register      static std::string GetTypeString() { @@ -226,15 +227,23 @@ public:      }      /// Returns the index of the register -    size_t GetIndex() const { +    std::size_t GetIndex() const {          return index;      }  private: -    const size_t index; +    const std::size_t index;      const std::string& suffix;  }; +enum class InternalFlag : u64 { +    ZeroFlag = 0, +    CarryFlag = 1, +    OverflowFlag = 2, +    NaNFlag = 3, +    Amount +}; +  /**   * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state   * of all registers (e.g. whether they are currently being used as Floats or Integers), and @@ -328,13 +337,19 @@ public:      void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,                                const std::string& value, u64 dest_num_components,                                u64 value_num_components, bool is_saturated = false, -                              u64 dest_elem = 0, Register::Size size = Register::Size::Word) { +                              u64 dest_elem = 0, Register::Size size = Register::Size::Word, +                              bool sets_cc = false) {          ASSERT_MSG(!is_saturated, "Unimplemented");          const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};          SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',                      dest_num_components, value_num_components, dest_elem); + +        if (sets_cc) { +            const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; +            SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); +        }      }      /** @@ -351,6 +366,26 @@ public:          shader.AddLine(dest + " = " + src + ';');      } +    std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { +        switch (cc) { +        case Tegra::Shader::ControlCode::NEU: +            return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; +        default: +            LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc)); +            UNREACHABLE(); +            return "false"; +        } +    } + +    std::string GetInternalFlag(const InternalFlag ii) const { +        const u32 code = static_cast<u32>(ii); +        return "internalFlag_" + std::to_string(code) + suffix; +    } + +    void SetInternalFlag(const InternalFlag ii, const std::string& value) const { +        shader.AddLine(GetInternalFlag(ii) + " = " + value + ';'); +    } +      /**       * Writes code that does a output attribute assignment to register operation. Output attributes       * are stored as floats, so this may require conversion. @@ -414,6 +449,12 @@ public:          }          declarations.AddNewLine(); +        for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { +            const InternalFlag code = static_cast<InternalFlag>(ii); +            declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); +        } +        declarations.AddNewLine(); +          for (const auto element : declr_input_attribute) {              // TODO(bunnei): Use proper number of elements for these              u32 idx = @@ -468,7 +509,7 @@ public:      /// necessary.      std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,                                bool is_array) { -        const size_t offset = static_cast<size_t>(sampler.index.Value()); +        const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());          // If this sampler has already been used, return the existing mapping.          const auto itr = @@ -481,7 +522,7 @@ public:          }          // Otherwise create a new mapping for this sampler -        const size_t next_index = used_samplers.size(); +        const std::size_t next_index = used_samplers.size();          const SamplerEntry entry{stage, offset, next_index, type, is_array};          used_samplers.emplace_back(entry);          return entry.GetName(); @@ -531,7 +572,7 @@ private:      void BuildRegisterList() {          regs.reserve(Register::NumRegisters); -        for (size_t index = 0; index < Register::NumRegisters; ++index) { +        for (std::size_t index = 0; index < Register::NumRegisters; ++index) {              regs.emplace_back(index, suffix);          }      } @@ -674,7 +715,7 @@ public:                    u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)          : subroutines(subroutines), program_code(program_code), main_offset(main_offset),            stage(stage), suffix(suffix) { - +        std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));          Generate(suffix);      } @@ -688,23 +729,6 @@ public:      }  private: -    // Shader program header for a Fragment Shader. -    struct FragmentHeader { -        INSERT_PADDING_WORDS(5); -        INSERT_PADDING_WORDS(13); -        u32 enabled_color_outputs; -        union { -            BitField<0, 1, u32> writes_samplemask; -            BitField<1, 1, u32> writes_depth; -        }; - -        bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { -            const u32 bit = render_target * 4 + component; -            return enabled_color_outputs & (1 << bit); -        } -    }; -    static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); -      /// Gets the Subroutine object corresponding to the specified address.      const Subroutine& GetSubroutine(u32 begin, u32 end) const {          const auto iter = subroutines.find(Subroutine{begin, end, suffix}); @@ -862,7 +886,7 @@ private:       */      bool IsSchedInstruction(u32 offset) const {          // sched instructions appear once every 4 instructions. -        static constexpr size_t SchedPeriod = 4; +        static constexpr std::size_t SchedPeriod = 4;          u32 absolute_offset = offset - main_offset;          return (absolute_offset % SchedPeriod) == 0; @@ -930,7 +954,7 @@ private:          std::string result;          result += '('; -        for (size_t i = 0; i < shift_amounts.size(); ++i) { +        for (std::size_t i = 0; i < shift_amounts.size(); ++i) {              if (i)                  result += '|';              result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + @@ -954,9 +978,7 @@ private:          // TEXS has two destination registers and a swizzle. The first two elements in the swizzle          // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 -        ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented"); - -        size_t written_components = 0; +        std::size_t written_components = 0;          for (u32 component = 0; component < 4; ++component) {              if (!instr.texs.IsComponentEnabled(component)) {                  continue; @@ -1010,10 +1032,8 @@ private:      /// Writes the output values from a fragment shader to the corresponding GLSL output variables.      void EmitFragmentOutputsWrite() {          ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); -        FragmentHeader header; -        std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); -        ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); +        ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");          // Write the color outputs using the data in the shader registers, disabled          // rendertargets/components are skipped in the register assignment. @@ -1022,7 +1042,7 @@ private:               ++render_target) {              // TODO(Subv): Figure out how dual-source blending is configured in the Switch.              for (u32 component = 0; component < 4; ++component) { -                if (header.IsColorComponentOutputEnabled(render_target, component)) { +                if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {                      shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,                                                 regs.GetRegisterAsFloat(current_reg)));                      ++current_reg; @@ -1030,7 +1050,7 @@ private:              }          } -        if (header.writes_depth) { +        if (header.ps.omap.depth) {              // The depth output is always 2 registers after the last color output, and current_reg              // already contains one past the last color register. @@ -1510,8 +1530,6 @@ private:              case OpCode::Id::LEA_IMM:              case OpCode::Id::LEA_RZ:              case OpCode::Id::LEA_HI: { -                std::string op_a; -                std::string op_b;                  std::string op_c;                  switch (opcode->GetId()) { @@ -1642,7 +1660,8 @@ private:                  }                  regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, -                                          1, instr.alu.saturate_d, 0, instr.conversion.dest_size); +                                          1, instr.alu.saturate_d, 0, instr.conversion.dest_size, +                                          instr.generates_cc.Value() != 0);                  break;              }              case OpCode::Id::I2F_R: @@ -1781,8 +1800,8 @@ private:                  Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,                                                    Tegra::Shader::IpaSampleMode::Default}; -                u32 next_element = instr.attribute.fmt20.element; -                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); +                u64 next_element = instr.attribute.fmt20.element; +                u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());                  const auto LoadNextElement = [&](u32 reg_offset) {                      regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, @@ -1846,8 +1865,8 @@ private:                  ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,                             "Unaligned attribute loads are not supported"); -                u32 next_element = instr.attribute.fmt20.element; -                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); +                u64 next_element = instr.attribute.fmt20.element; +                u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());                  const auto StoreNextElement = [&](u32 reg_offset) {                      regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), @@ -1873,6 +1892,13 @@ private:                  Tegra::Shader::TextureType texture_type{instr.tex.texture_type};                  std::string coord; +                ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), +                           "NODEP is not implemented"); +                ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), +                           "AOFFI is not implemented"); +                ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), +                           "DC is not implemented"); +                  switch (texture_type) {                  case Tegra::Shader::TextureType::Texture1D: {                      const std::string x = regs.GetRegisterAsFloat(instr.gpr8); @@ -1937,8 +1963,8 @@ private:                      UNREACHABLE();                  }                  } -                size_t dest_elem{}; -                for (size_t elem = 0; elem < 4; ++elem) { +                std::size_t dest_elem{}; +                for (std::size_t elem = 0; elem < 4; ++elem) {                      if (!instr.tex.IsComponentEnabled(elem)) {                          // Skip disabled components                          continue; @@ -1955,6 +1981,11 @@ private:                  Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};                  bool is_array{instr.texs.IsArrayTexture()}; +                ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), +                           "NODEP is not implemented"); +                ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), +                           "DC is not implemented"); +                  switch (texture_type) {                  case Tegra::Shader::TextureType::Texture2D: {                      if (is_array) { @@ -1990,6 +2021,13 @@ private:                  std::string coord;                  const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};                  const bool is_array{instr.tlds.IsArrayTexture()}; +  +                ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), +                           "NODEP is not implemented"); +                ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), +                           "AOFFI is not implemented"); +                ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), +                           "MZ is not implemented");                  switch (texture_type) {                  case Tegra::Shader::TextureType::Texture1D: { @@ -2024,6 +2062,17 @@ private:                  ASSERT(instr.tld4.array == 0);                  std::string coord; +                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), +                           "NODEP is not implemented"); +                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), +                           "AOFFI is not implemented"); +                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), +                           "DC is not implemented"); +                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), +                           "NDV is not implemented"); +                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), +                           "PTP is not implemented"); +                  switch (instr.tld4.texture_type) {                  case Tegra::Shader::TextureType::Texture2D: {                      const std::string x = regs.GetRegisterAsFloat(instr.gpr8); @@ -2047,8 +2096,8 @@ private:                  const std::string texture = "textureGather(" + sampler + ", coords, " +                                              std::to_string(instr.tld4.component) + ')'; -                size_t dest_elem{}; -                for (size_t elem = 0; elem < 4; ++elem) { +                std::size_t dest_elem{}; +                for (std::size_t elem = 0; elem < 4; ++elem) {                      if (!instr.tex.IsComponentEnabled(elem)) {                          // Skip disabled components                          continue; @@ -2061,6 +2110,13 @@ private:                  break;              }              case OpCode::Id::TLD4S: { +                ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), +                           "NODEP is not implemented"); +                ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), +                           "AOFFI is not implemented"); +                ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), +                           "DC is not implemented"); +                  const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);                  const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);                  // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. @@ -2073,6 +2129,9 @@ private:                  break;              }              case OpCode::Id::TXQ: { +                ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), +                           "NODEP is not implemented"); +                  // TODO: the new commits on the texture refactor, change the way samplers work.                  // Sadly, not all texture instructions specify the type of texture their sampler                  // uses. This must be fixed at a later instance. @@ -2093,6 +2152,11 @@ private:                  break;              }              case OpCode::Id::TMML: { +                ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), +                           "NODEP is not implemented"); +                ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), +                           "NDV is not implemented"); +                  const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);                  const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);                  const bool is_array = instr.tmml.array != 0; @@ -2259,31 +2323,55 @@ private:              break;          }          case OpCode::Type::PredicateSetPredicate: { -            const std::string op_a = -                GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); -            const std::string op_b = -                GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); +            switch (opcode->GetId()) { +            case OpCode::Id::PSETP: { +                const std::string op_a = +                    GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); +                const std::string op_b = +                    GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); -            // We can't use the constant predicate as destination. -            ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); +                // We can't use the constant predicate as destination. +                ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); -            const std::string second_pred = -                GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); +                const std::string second_pred = +                    GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); -            const std::string combiner = GetPredicateCombiner(instr.psetp.op); +                const std::string combiner = GetPredicateCombiner(instr.psetp.op); -            const std::string predicate = -                '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; +                const std::string predicate = +                    '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; -            // Set the primary predicate to the result of Predicate OP SecondPredicate -            SetPredicate(instr.psetp.pred3, -                         '(' + predicate + ") " + combiner + " (" + second_pred + ')'); +                // Set the primary predicate to the result of Predicate OP SecondPredicate +                SetPredicate(instr.psetp.pred3, +                             '(' + predicate + ") " + combiner + " (" + second_pred + ')'); -            if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { -                // Set the secondary predicate to the result of !Predicate OP SecondPredicate, -                // if enabled -                SetPredicate(instr.psetp.pred0, -                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); +                if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { +                    // Set the secondary predicate to the result of !Predicate OP SecondPredicate, +                    // if enabled +                    SetPredicate(instr.psetp.pred0, +                                 "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); +                } +                break; +            } +            case OpCode::Id::CSETP: { +                const std::string pred = +                    GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); +                const std::string combiner = GetPredicateCombiner(instr.csetp.op); +                const std::string controlCode = regs.GetControlCode(instr.csetp.cc); +                if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { +                    SetPredicate(instr.csetp.pred3, +                                 '(' + controlCode + ") " + combiner + " (" + pred + ')'); +                } +                if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { +                    SetPredicate(instr.csetp.pred0, +                                 "!(" + controlCode + ") " + combiner + " (" + pred + ')'); +                } +                break; +            } +            default: { +                LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName()); +                UNREACHABLE(); +            }              }              break;          } @@ -2673,6 +2761,7 @@ private:  private:      const std::set<Subroutine>& subroutines;      const ProgramCode& program_code; +    Tegra::Shader::Header header;      const u32 main_offset;      Maxwell3D::Regs::ShaderStage stage;      const std::string& suffix; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index a43e2997b..d53b93ad5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -13,7 +13,7 @@  namespace OpenGL::GLShader { -constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; +constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};  using ProgramCode = std::vector<u64>;  class ConstBufferEntry { @@ -51,7 +51,7 @@ public:      }      std::string GetName() const { -        return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index); +        return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);      }      u32 GetHash() const { @@ -74,15 +74,15 @@ class SamplerEntry {      using Maxwell = Tegra::Engines::Maxwell3D::Regs;  public: -    SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index, +    SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,                   Tegra::Shader::TextureType type, bool is_array)          : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} -    size_t GetOffset() const { +    std::size_t GetOffset() const {          return offset;      } -    size_t GetIndex() const { +    std::size_t GetIndex() const {          return sampler_index;      } @@ -91,7 +91,7 @@ public:      }      std::string GetName() const { -        return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' + +        return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +                 std::to_string(sampler_index);      } @@ -133,7 +133,7 @@ public:      }      static std::string GetArrayName(Maxwell::ShaderStage stage) { -        return TextureSamplerNames[static_cast<size_t>(stage)]; +        return TextureSamplerNames[static_cast<std::size_t>(stage)];      }  private: @@ -143,9 +143,9 @@ private:      /// Offset in TSC memory from which to read the sampler object, as specified by the sampling      /// instruction. -    size_t offset; +    std::size_t offset;      Maxwell::ShaderStage stage;      ///< Shader stage where this sampler was used. -    size_t sampler_index;            ///< Value used to index into the generated GLSL sampler array. +    std::size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.      Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)      bool is_array; ///< Whether the texture is being sampled as an array texture or not.  }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 533e42caa..b86cd96e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,7 +12,7 @@  namespace OpenGL::GLShader {  /// Number of OpenGL texture samplers that can be used in the fragment shader -static constexpr size_t NumTextureSamplers = 32; +static constexpr std::size_t NumTextureSamplers = 32;  using Tegra::Engines::Maxwell3D; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 6f70deb96..af99132ba 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -272,7 +272,7 @@ void OpenGLState::Apply() const {      }      // Clip distance -    for (size_t i = 0; i < clip_distance.size(); ++i) { +    for (std::size_t i = 0; i < clip_distance.size(); ++i) {          if (clip_distance[i] != cur_state.clip_distance[i]) {              if (clip_distance[i]) {                  glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index aadf68f16..664f3ca20 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a      mapped_size = size;      if (alignment > 0) { -        buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); +        buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);      }      bool invalidate = false; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 272294c62..20ba6d4f6 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_      }  } +template <std::size_t N, std::size_t M> +struct alignas(64) SwizzleTable { +    constexpr SwizzleTable() { +        for (u32 y = 0; y < N; ++y) { +            for (u32 x = 0; x < M; ++x) { +                const u32 x2 = x * 16; +                values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + +                                                ((x2 % 32) / 16) * 32 + (y % 2) * 16); +            } +        } +    } +    const std::array<u16, M>& operator[](std::size_t index) const { +        return values[index]; +    } +    std::array<std::array<u16, M>, N> values{}; +}; + +constexpr auto swizzle_table = SwizzleTable<8, 4>(); + +void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data, +                     u8* unswizzled_data, bool unswizzle, u32 block_height) { +    std::array<u8*, 2> data_ptrs; +    const std::size_t stride{width * bytes_per_pixel}; +    const std::size_t image_width_in_gobs{(stride + 63) / 64}; +    const std::size_t copy_size{16}; +    for (std::size_t y = 0; y < height; ++y) { +        const std::size_t initial_gob = +            (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs + +            (y % (8 * block_height) / 8) * 512; +        const std::size_t pixel_base{y * width * bytes_per_pixel}; +        const auto& table = swizzle_table[y % 8]; +        for (std::size_t xb = 0; xb < stride; xb += copy_size) { +            const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height}; +            const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; +            const std::size_t pixel_index{xb + pixel_base}; +            data_ptrs[unswizzle] = swizzled_data + swizzle_offset; +            data_ptrs[!unswizzle] = unswizzled_data + pixel_index; +            std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); +        } +    } +} +  u32 BytesPerPixel(TextureFormat format) {      switch (format) {      case TextureFormat::DXT1: @@ -63,6 +105,7 @@ u32 BytesPerPixel(TextureFormat format) {      case TextureFormat::R32_G32_B32:          return 12;      case TextureFormat::ASTC_2D_4X4: +    case TextureFormat::ASTC_2D_8X8:      case TextureFormat::A8R8G8B8:      case TextureFormat::A2B10G10R10:      case TextureFormat::BF10GF11RF11: @@ -91,8 +134,13 @@ u32 BytesPerPixel(TextureFormat format) {  std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,                                   u32 height, u32 block_height) {      std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); -    CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, -                     Memory::GetPointer(address), unswizzled_data.data(), true, block_height); +    if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { +        FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel, +                        Memory::GetPointer(address), unswizzled_data.data(), true, block_height); +    } else { +        CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, +                         Memory::GetPointer(address), unswizzled_data.data(), true, block_height); +    }      return unswizzled_data;  } @@ -111,6 +159,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat      case TextureFormat::BC6H_UF16:      case TextureFormat::BC6H_SF16:      case TextureFormat::ASTC_2D_4X4: +    case TextureFormat::ASTC_2D_8X8:      case TextureFormat::A8R8G8B8:      case TextureFormat::A2B10G10R10:      case TextureFormat::A1B5G5R5:  | 
