From e926757c8f7d8d1fc90e62c2b891a65fe1119173 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 10 Sep 2018 21:00:55 -0400 Subject: Implemented (Partialy) Shader Header --- src/video_core/CMakeLists.txt | 1 + src/video_core/engines/shader_header.h | 97 ++++++++++++++++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 6 +- 3 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 src/video_core/engines/shader_header.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 65b5f57c3..ac4ec36de 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -12,6 +12,7 @@ add_library(video_core STATIC engines/maxwell_dma.cpp engines/maxwell_dma.h engines/shader_bytecode.h + engines/shader_header.h gpu.cpp gpu.h macro_interpreter.cpp diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h new file mode 100644 index 000000000..4663377ed --- /dev/null +++ b/src/video_core/engines/shader_header.h @@ -0,0 +1,97 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "common/common_funcs.h" + +namespace Tegra::Shader { + +enum class OutputTopology : u32 { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture +struct Header { + union { + BitField<0, 5, u32> sph_type; + BitField<5, 5, u32> version; + BitField<10, 4, u32> shader_type; + BitField<14, 1, u32> mrt_enable; + BitField<15, 1, u32> kills_pixels; + BitField<16, 1, u32> does_global_store; + BitField<17, 4, u32> sass_version; + BitField<21, 5, u32> reserved; + BitField<26, 1, u32> does_load_or_store; + BitField<27, 1, u32> does_fp64; + BitField<28, 4, u32> stream_out_mask; + } common0; + + union { + BitField<0, 24, u32> shader_local_memory_low_size; + BitField<24, 8, u32> per_patch_attribute_count; + } common1; + + union { + BitField<0, 24, u32> shader_local_memory_high_size; + BitField<24, 8, u32> threads_per_input_primitive; + } common2; + + union { + BitField<0, 24, u32> shader_local_memory_crs_size; + BitField<24, 4, OutputTopology> output_topology; + BitField<28, 4, u32> reserved; + } common3; + + union { + BitField<0, 12, u32> max_output_vertices; + BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. + BitField<24, 4, u32> reserved; + BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + } common4; + + union { + struct { + INSERT_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_PADDING_BYTES(16); // ImapGenericVector[32] + INSERT_PADDING_BYTES(2); // ImapColor + INSERT_PADDING_BYTES(2); // ImapSystemValuesC + INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES(1); // ImapReserved + INSERT_PADDING_BYTES(3); // OmapSystemValuesA + INSERT_PADDING_BYTES(1); // OmapSystemValuesB + INSERT_PADDING_BYTES(16); // OmapGenericVector[32] + INSERT_PADDING_BYTES(2); // OmapColor + INSERT_PADDING_BYTES(2); // OmapSystemValuesC + INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES(1); // OmapReserved + } ps; + + struct { + INSERT_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_PADDING_BYTES(32); // ImapGenericVector[32] + INSERT_PADDING_BYTES(2); // ImapColor + INSERT_PADDING_BYTES(2); // ImapSystemValuesC + INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES(2); // ImapReserved + INSERT_PADDING_BYTES(4); // OmapTarget[8] + union { + BitField<0, 1, u32> omap_sample_mask; + BitField<1, 1, u32> omap_depth; + BitField<2, 30, u32> omap_reserved; + } omap; + } vtg; + } sph; +}; + +static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); + +} // namespace Tegra::Shader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e350113f1..d826d98eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/engines/shader_header.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -26,7 +27,7 @@ using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -constexpr u32 PROGRAM_HEADER_SIZE = 0x50; +constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); class DecompileFail : public std::runtime_error { public: @@ -674,7 +675,7 @@ public: u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) : subroutines(subroutines), program_code(program_code), main_offset(main_offset), stage(stage), suffix(suffix) { - + std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); Generate(suffix); } @@ -2502,6 +2503,7 @@ private: private: const std::set& subroutines; const ProgramCode& program_code; + Tegra::Shader::Header header; const u32 main_offset; Maxwell3D::Regs::ShaderStage stage; const std::string& suffix; -- cgit v1.2.3 From 2b48cfd44b9923d887314ca2ce8ad09240a997b2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 11 Sep 2018 12:08:06 -0400 Subject: Replace old FragmentHeader for the new Header --- src/video_core/engines/shader_header.h | 24 +++++++++++++-------- .../renderer_opengl/gl_shader_decompiler.cpp | 25 +++------------------- 2 files changed, 18 insertions(+), 31 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index 4663377ed..a885ee3cf 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -5,8 +5,8 @@ #pragma once #include "common/bit_field.h" -#include "common/common_types.h" #include "common/common_funcs.h" +#include "common/common_types.h" namespace Tegra::Shader { @@ -72,7 +72,7 @@ struct Header { INSERT_PADDING_BYTES(2); // OmapSystemValuesC INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10] INSERT_PADDING_BYTES(1); // OmapReserved - } ps; + } vtg; struct { INSERT_PADDING_BYTES(3); // ImapSystemValuesA @@ -82,14 +82,20 @@ struct Header { INSERT_PADDING_BYTES(2); // ImapSystemValuesC INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] INSERT_PADDING_BYTES(2); // ImapReserved - INSERT_PADDING_BYTES(4); // OmapTarget[8] - union { - BitField<0, 1, u32> omap_sample_mask; - BitField<1, 1, u32> omap_depth; - BitField<2, 30, u32> omap_reserved; + struct { + u32 target; + union { + BitField<0, 1, u32> sample_mask; + BitField<1, 1, u32> depth; + BitField<2, 30, u32> reserved; + }; } omap; - } vtg; - } sph; + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { + const u32 bit = render_target * 4 + component; + return omap.target & (1 << bit); + } + } ps; + }; }; static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d826d98eb..434faf9d4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -689,23 +689,6 @@ public: } private: - // Shader program header for a Fragment Shader. - struct FragmentHeader { - INSERT_PADDING_WORDS(5); - INSERT_PADDING_WORDS(13); - u32 enabled_color_outputs; - union { - BitField<0, 1, u32> writes_samplemask; - BitField<1, 1, u32> writes_depth; - }; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return enabled_color_outputs & (1 << bit); - } - }; - static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); - /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { const auto iter = subroutines.find(Subroutine{begin, end, suffix}); @@ -1011,10 +994,8 @@ private: /// Writes the output values from a fragment shader to the corresponding GLSL output variables. void EmitFragmentOutputsWrite() { ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); - FragmentHeader header; - std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); - ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); + ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented"); // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. @@ -1023,7 +1004,7 @@ private: ++render_target) { // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { - if (header.IsColorComponentOutputEnabled(render_target, component)) { + if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, regs.GetRegisterAsFloat(current_reg))); ++current_reg; @@ -1031,7 +1012,7 @@ private: } } - if (header.writes_depth) { + if (header.ps.omap.depth) { // The depth output is always 2 registers after the last color output, and current_reg // already contains one past the last color register. -- cgit v1.2.3 From ae128f0375f5914a7e4818e29c891a927fcf8ded Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 13 Sep 2018 21:53:20 -0400 Subject: gl_shader_decompiler: Get rid of variable shadowing within LEA instructions These variables are already defined within an outer scope. --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2d56370c7..e82c20942 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1510,8 +1510,6 @@ private: case OpCode::Id::LEA_IMM: case OpCode::Id::LEA_RZ: case OpCode::Id::LEA_HI: { - std::string op_a; - std::string op_b; std::string op_c; switch (opcode->GetId()) { -- cgit v1.2.3 From f8e994354f62e823d2a480eb00e0b3e34eff1a65 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 13 Sep 2018 22:58:40 -0400 Subject: Optimized Texture Swizzling --- src/video_core/textures/decoders.cpp | 51 ++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 272294c62..55926cca4 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_ } } +template +struct alignas(64) SwizzleTable { + constexpr SwizzleTable() { + for (u32 y = 0; y < N; ++y) { + for (u32 x = 0; x < M; ++x) { + const u32 x2 = x * 16; + values[y][x] = static_cast(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + + ((x2 % 32) / 16) * 32 + (y % 2) * 16); + } + } + } + const std::array& operator[](std::size_t index) const { + return values[index]; + } + std::array, N> values{}; +}; + +constexpr auto swizzle_table = SwizzleTable<8, 4>(); + +void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data, + u8* unswizzled_data, bool unswizzle, u32 block_height) { + std::array data_ptrs; + const std::size_t stride{width * bytes_per_pixel}; + const std::size_t image_width_in_gobs{(stride + 63) / 64}; + const std::size_t copy_size{16}; + for (std::size_t y = 0; y < height; ++y) { + const std::size_t initial_gob = + (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs + + (y % (8 * block_height) / 8) * 512; + const std::size_t pixel_base{y * width * bytes_per_pixel}; + const auto& table = swizzle_table[y % 8]; + for (std::size_t xb = 0; xb < stride; xb += copy_size) { + const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height}; + const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; + const std::size_t pixel_index{xb + pixel_base}; + data_ptrs[unswizzle] = swizzled_data + swizzle_offset; + data_ptrs[!unswizzle] = unswizzled_data + pixel_index; + std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); + } + } +} + u32 BytesPerPixel(TextureFormat format) { switch (format) { case TextureFormat::DXT1: @@ -91,8 +133,13 @@ u32 BytesPerPixel(TextureFormat format) { std::vector UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, u32 height, u32 block_height) { std::vector unswizzled_data(width * height * bytes_per_pixel); - CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, - Memory::GetPointer(address), unswizzled_data.data(), true, block_height); + if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { + FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel, + Memory::GetPointer(address), unswizzled_data.data(), true, block_height); + } else { + CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, + Memory::GetPointer(address), unswizzled_data.data(), true, block_height); + } return unswizzled_data; } -- cgit v1.2.3 From 63c2e32e207d31ecadd9022e1d7cd705c9febac8 Mon Sep 17 00:00:00 2001 From: fearlessTobi Date: Sat, 15 Sep 2018 15:21:06 +0200 Subject: Port #4182 from Citra: "Prefix all size_t with std::" --- src/video_core/engines/fermi_2d.h | 2 +- src/video_core/engines/maxwell_3d.cpp | 13 ++++--- src/video_core/engines/maxwell_3d.h | 28 +++++++------- src/video_core/engines/maxwell_dma.cpp | 2 +- src/video_core/engines/maxwell_dma.h | 2 +- src/video_core/engines/shader_bytecode.h | 18 ++++----- src/video_core/macro_interpreter.h | 2 +- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 15 ++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 16 ++++---- src/video_core/renderer_opengl/gl_rasterizer.cpp | 30 +++++++-------- src/video_core/renderer_opengl/gl_rasterizer.h | 8 ++-- .../renderer_opengl/gl_rasterizer_cache.cpp | 43 +++++++++++----------- .../renderer_opengl/gl_rasterizer_cache.h | 32 ++++++++-------- src/video_core/renderer_opengl/gl_shader_cache.cpp | 6 +-- src/video_core/renderer_opengl/gl_shader_cache.h | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 28 +++++++------- src/video_core/renderer_opengl/gl_shader_gen.h | 18 ++++----- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- src/video_core/renderer_opengl/gl_state.cpp | 2 +- .../renderer_opengl/gl_stream_buffer.cpp | 2 +- 20 files changed, 138 insertions(+), 133 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index dcf9ef8b9..021b83eaa 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -26,7 +26,7 @@ public: void WriteReg(u32 method, u32 value); struct Regs { - static constexpr size_t NUM_REGS = 0x258; + static constexpr std::size_t NUM_REGS = 0x258; struct Surface { RenderTargetFormat format; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 329079ddd..8afd26fe9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() { void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. - auto& shader = state.shader_stages[static_cast(stage)]; - auto& bind_data = regs.cb_bind[static_cast(stage)]; + auto& shader = state.shader_stages[static_cast(stage)]; + auto& bind_data = regs.cb_bind[static_cast(stage)]; auto& buffer = shader.const_buffers[bind_data.index]; @@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { std::vector Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { std::vector textures; - auto& fragment_shader = state.shader_stages[static_cast(stage)]; + auto& fragment_shader = state.shader_stages[static_cast(stage)]; auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; // Offset into the texture constbuffer where the texture info begins. - static constexpr size_t TextureInfoOffset = 0x20; + static constexpr std::size_t TextureInfoOffset = 0x20; for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { @@ -360,8 +360,9 @@ std::vector Maxwell3D::GetStageTextures(Regs::ShaderSt return textures; } -Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const { - auto& shader = state.shader_stages[static_cast(stage)]; +Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, + std::size_t offset) const { + auto& shader = state.shader_stages[static_cast(stage)]; auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d3be900a4..b81b0723d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -34,17 +34,17 @@ public: /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. struct Regs { - static constexpr size_t NUM_REGS = 0xE00; - - static constexpr size_t NumRenderTargets = 8; - static constexpr size_t NumViewports = 16; - static constexpr size_t NumCBData = 16; - static constexpr size_t NumVertexArrays = 32; - static constexpr size_t NumVertexAttributes = 32; - static constexpr size_t MaxShaderProgram = 6; - static constexpr size_t MaxShaderStage = 5; + static constexpr std::size_t NUM_REGS = 0xE00; + + static constexpr std::size_t NumRenderTargets = 8; + static constexpr std::size_t NumViewports = 16; + static constexpr std::size_t NumCBData = 16; + static constexpr std::size_t NumVertexArrays = 32; + static constexpr std::size_t NumVertexAttributes = 32; + static constexpr std::size_t MaxShaderProgram = 6; + static constexpr std::size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. - static constexpr size_t MaxConstBuffers = 18; + static constexpr std::size_t MaxConstBuffers = 18; enum class QueryMode : u32 { Write = 0, @@ -443,9 +443,9 @@ public: } }; - bool IsShaderConfigEnabled(size_t index) const { + bool IsShaderConfigEnabled(std::size_t index) const { // The VertexB is always enabled. - if (index == static_cast(Regs::ShaderProgram::VertexB)) { + if (index == static_cast(Regs::ShaderProgram::VertexB)) { return true; } return shader_config[index].enable != 0; @@ -571,7 +571,7 @@ public: BitField<25, 3, u32> map_7; }; - u32 GetMap(size_t index) const { + u32 GetMap(std::size_t index) const { const std::array maps{map_0, map_1, map_2, map_3, map_4, map_5, map_6, map_7}; ASSERT(index < maps.size()); @@ -925,7 +925,7 @@ public: std::vector GetStageTextures(Regs::ShaderStage stage) const; /// Returns the texture information for a specific texture in a specific shader stage. - Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; + Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; private: VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c24d33d5c..aa7481b8c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() { ASSERT(regs.dst_params.pos_y == 0); if (regs.exec.is_dst_linear == regs.exec.is_src_linear) { - size_t copy_size = regs.x_count; + std::size_t copy_size = regs.x_count; // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count). diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 7882f16e0..311ccb616 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -23,7 +23,7 @@ public: void WriteReg(u32 method, u32 value); struct Regs { - static constexpr size_t NUM_REGS = 0x1D6; + static constexpr std::size_t NUM_REGS = 0x1D6; struct Parameters { union { diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 58f2904ce..d85c5883d 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -20,10 +20,10 @@ namespace Tegra::Shader { struct Register { /// Number of registers - static constexpr size_t NumRegisters = 256; + static constexpr std::size_t NumRegisters = 256; /// Register 255 is special cased to always be 0 - static constexpr size_t ZeroIndex = 255; + static constexpr std::size_t ZeroIndex = 255; enum class Size : u64 { Byte = 0, @@ -584,7 +584,7 @@ union Instruction { BitField<31, 4, u64> component_mask; BitField<55, 3, TextureProcessMode> process_mode; - bool IsComponentEnabled(size_t component) const { + bool IsComponentEnabled(std::size_t component) const { return ((1ull << component) & component_mask) != 0; } } tex; @@ -599,7 +599,7 @@ union Instruction { BitField<29, 2, TextureType> texture_type; BitField<31, 4, u64> component_mask; - bool IsComponentEnabled(size_t component) const { + bool IsComponentEnabled(std::size_t component) const { return ((1ull << component) & component_mask) != 0; } } tmml; @@ -646,7 +646,7 @@ union Instruction { return gpr28.Value() != Register::ZeroIndex; } - bool IsComponentEnabled(size_t component) const { + bool IsComponentEnabled(std::size_t component) const { static constexpr std::array, 4> mask_lut{{ {}, {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, @@ -654,7 +654,7 @@ union Instruction { {0x7, 0xb, 0xd, 0xe, 0xf}, }}; - size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; + std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; u32 mask = mask_lut[index][component_mask_selector]; @@ -939,7 +939,7 @@ public: private: struct Detail { private: - static constexpr size_t opcode_bitsize = 16; + static constexpr std::size_t opcode_bitsize = 16; /** * Generates the mask and the expected value after masking from a given bitstring. @@ -948,8 +948,8 @@ private: */ static auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; - for (size_t i = 0; i < opcode_bitsize; i++) { - const size_t bit_position = opcode_bitsize - i - 1; + for (std::size_t i = 0; i < opcode_bitsize; i++) { + const std::size_t bit_position = opcode_bitsize - i - 1; switch (bitstring[i]) { case '0': mask |= 1 << bit_position; diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 7d836b816..cee0baaf3 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -152,7 +152,7 @@ private: boost::optional delayed_pc; ///< Program counter to execute at after the delay slot is executed. - static constexpr size_t NumMacroRegisters = 8; + static constexpr std::size_t NumMacroRegisters = 8; /// General purpose macro registers. std::array registers = {}; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0b5d18bcb..578aca789 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -12,10 +12,10 @@ namespace OpenGL { -OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} +OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} -GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment, - bool cache) { +GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, + std::size_t alignment, bool cache) { auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); const boost::optional cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; @@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz return uploaded_offset; } -GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) { +GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment) { AlignBuffer(alignment); std::memcpy(buffer_ptr, raw_pointer, size); GLintptr uploaded_offset = buffer_offset; @@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, return uploaded_offset; } -void OGLBufferCache::Map(size_t max_size) { +void OGLBufferCache::Map(std::size_t max_size) { bool invalidate; std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(static_cast(max_size), 4); @@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const { return stream_buffer.GetHandle(); } -void OGLBufferCache::AlignBuffer(size_t alignment) { +void OGLBufferCache::AlignBuffer(std::size_t alignment) { // Align the offset, not the mapped pointer GLintptr offset_aligned = - static_cast(Common::AlignUp(static_cast(buffer_offset), alignment)); + static_cast(Common::AlignUp(static_cast(buffer_offset), alignment)); buffer_ptr += offset_aligned - buffer_offset; buffer_offset = offset_aligned; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6da862902..6c18461f4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -19,32 +19,32 @@ struct CachedBufferEntry final { return addr; } - size_t GetSizeInBytes() const { + std::size_t GetSizeInBytes() const { return size; } VAddr addr; - size_t size; + std::size_t size; GLintptr offset; - size_t alignment; + std::size_t alignment; }; class OGLBufferCache final : public RasterizerCache> { public: - explicit OGLBufferCache(size_t size); + explicit OGLBufferCache(std::size_t size); - GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4, + GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool cache = true); - GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4); + GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); - void Map(size_t max_size); + void Map(std::size_t max_size); void Unmap(); GLuint GetHandle() const; protected: - void AlignBuffer(size_t alignment); + void AlignBuffer(std::size_t alignment); private: OGLStreamBuffer stream_buffer; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7e1bba67d..274c2dbcf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -46,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { // Create sampler objects - for (size_t i = 0; i < texture_samplers.size(); ++i) { + for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } @@ -181,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() { u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; u32 current_texture_bindpoint = 0; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast(index)}; @@ -190,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() { continue; } - const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 + const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); const GLintptr offset = buffer_cache.UploadHostMemory( - &ubo, sizeof(ubo), static_cast(uniform_buffer_alignment)); + &ubo, sizeof(ubo), static_cast(uniform_buffer_alignment)); // Bind the buffer glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo)); @@ -238,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() { shader_program_manager->UseTrivialGeometryShader(); } -size_t RasterizerOpenGL::CalculateVertexArraysSize() const { +std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - size_t size = 0; + std::size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { if (!regs.vertex_array[index].IsEnabled()) continue; @@ -299,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, bool preserve_contents, - boost::optional single_color_target) { + boost::optional single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; @@ -330,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep } else { // Multiple color attachments are enabled std::array buffers; - for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); glFramebufferTexture2D( @@ -342,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep } } else { // No color attachments are enabled - zero out all of them - for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast(index), GL_TEXTURE_2D, 0, 0); @@ -462,15 +462,15 @@ void RasterizerOpenGL::DrawArrays() { state.draw.vertex_buffer = buffer_cache.GetHandle(); state.Apply(); - size_t buffer_size = CalculateVertexArraysSize(); + std::size_t buffer_size = CalculateVertexArraysSize(); if (is_indexed) { - buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; + buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; } // Uniform space for the 5 shader stages buffer_size = - Common::AlignUp(buffer_size, 4) + + Common::AlignUp(buffer_size, 4) + (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; // Add space for at least 18 constant buffers @@ -644,7 +644,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad MICROPROFILE_SCOPE(OpenGL_UBO); const auto& gpu = Core::System::GetInstance().GPU(); const auto& maxwell3d = gpu.Maxwell3D(); - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; + const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; const auto& entries = shader->GetShaderEntries().const_buffer_entries; constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; @@ -667,7 +667,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad continue; } - size_t size = 0; + std::size_t size = 0; if (used_buffer.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing @@ -689,7 +689,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); GLintptr const_buffer_offset = buffer_cache.UploadMemory( - buffer.address, size, static_cast(uniform_buffer_alignment)); + buffer.address, size, static_cast(uniform_buffer_alignment)); // Now configure the bindpoint of the buffer inside the shader glUniformBlockBinding(shader->GetProgramHandle(), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 163412882..bf9560bdc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -73,7 +73,7 @@ public: }; /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; + static constexpr std::size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); @@ -106,7 +106,7 @@ private: */ void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, bool preserve_contents = true, - boost::optional single_color_target = {}); + boost::optional single_color_target = {}); /* * Configures the current constbuffers to use for the draw command. @@ -180,12 +180,12 @@ private: std::array texture_samplers; - static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; + static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; OGLFramebuffer framebuffer; GLint uniform_buffer_alignment; - size_t CalculateVertexArraysSize() const; + std::size_t CalculateVertexArraysSize() const; void SetupVertexArrays(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 32001e44b..3f385484f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -75,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { return params; } -/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) { +/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) { const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; SurfaceParams params{}; params.addr = TryGetCpuAddr(config.Address()); @@ -203,7 +203,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) { } static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { - ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); + ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); auto& format = tex_format_tuples[static_cast(pixel_format)]; ASSERT(component_type == format.component_type); @@ -256,7 +256,7 @@ static bool IsFormatBCn(PixelFormat format) { } template -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size, +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size, VAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); @@ -267,7 +267,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; const std::vector data = Tegra::Texture::UnswizzleTexture( addr, tile_size, bytes_per_pixel, stride, height, block_height); - const size_t size_to_copy{std::min(gl_buffer_size, data.size())}; + const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; memcpy(gl_buffer, data.data(), size_to_copy); } else { // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should @@ -278,7 +278,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t } } -static constexpr std::array morton_to_gl_fns = { // clang-format off @@ -335,7 +335,7 @@ static constexpr std::array gl_to_morton_fns = { // clang-format off @@ -513,9 +513,9 @@ static void ConvertS8Z24ToZ24S8(std::vector& data, u32 width, u32 height) { S8Z24 input_pixel{}; Z24S8 output_pixel{}; constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; - for (size_t y = 0; y < height; ++y) { - for (size_t x = 0; x < width; ++x) { - const size_t offset{bpp * (y * width + x)}; + for (std::size_t y = 0; y < height; ++y) { + for (std::size_t x = 0; x < width; ++x) { + const std::size_t offset{bpp * (y * width + x)}; std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); output_pixel.s8.Assign(input_pixel.s8); output_pixel.z24.Assign(input_pixel.z24); @@ -526,9 +526,9 @@ static void ConvertS8Z24ToZ24S8(std::vector& data, u32 width, u32 height) { static void ConvertG8R8ToR8G8(std::vector& data, u32 width, u32 height) { constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; - for (size_t y = 0; y < height; ++y) { - for (size_t x = 0; x < width; ++x) { - const size_t offset{bpp * (y * width + x)}; + for (std::size_t y = 0; y < height; ++y) { + for (std::size_t x = 0; x < width; ++x) { + const std::size_t offset{bpp * (y * width + x)}; const u8 temp{data[offset]}; data[offset] = data[offset + 1]; data[offset + 1] = temp; @@ -591,13 +591,13 @@ void CachedSurface::LoadGLBuffer() { UNREACHABLE(); } - gl_buffer.resize(static_cast(params.depth) * copy_size); - morton_to_gl_fns[static_cast(params.pixel_format)]( + gl_buffer.resize(static_cast(params.depth) * copy_size); + morton_to_gl_fns[static_cast(params.pixel_format)]( params.width, params.block_height, params.height, gl_buffer.data(), copy_size, params.addr); } else { const u8* const texture_src_data_end{texture_src_data + - (static_cast(params.depth) * copy_size)}; + (static_cast(params.depth) * copy_size)}; gl_buffer.assign(texture_src_data, texture_src_data_end); } @@ -616,7 +616,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle MICROPROFILE_SCOPE(OpenGL_TextureUL); - ASSERT(gl_buffer.size() == static_cast(params.width) * params.height * + ASSERT(gl_buffer.size() == static_cast(params.width) * params.height * GetGLBytesPerPixel(params.pixel_format) * params.depth); const auto& rect{params.GetRect()}; @@ -624,8 +624,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle // Load data from memory to the surface const GLint x0 = static_cast(rect.left); const GLint y0 = static_cast(rect.bottom); - const size_t buffer_offset = - static_cast(static_cast(y0) * params.width + static_cast(x0)) * + const std::size_t buffer_offset = + static_cast(static_cast(y0) * params.width + + static_cast(x0)) * GetGLBytesPerPixel(params.pixel_format); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); @@ -727,7 +728,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { return GetSurface(depth_params, preserve_contents); } -Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) { +Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) { const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); @@ -825,7 +826,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, auto source_format = GetFormatTuple(params.pixel_format, params.component_type); auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); - size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); + std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); @@ -849,7 +850,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " "reinterpretation but the texture is tiled."); } - size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); + std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); std::vector data(remaining_size); Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 57ea8593b..aafac9a20 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -90,7 +90,7 @@ struct SurfaceParams { Invalid = 255, }; - static constexpr size_t MaxPixelFormat = static_cast(PixelFormat::Max); + static constexpr std::size_t MaxPixelFormat = static_cast(PixelFormat::Max); enum class ComponentType { Invalid = 0, @@ -199,8 +199,8 @@ struct SurfaceParams { 1, // Z32FS8 }}; - ASSERT(static_cast(format) < compression_factor_table.size()); - return compression_factor_table[static_cast(format)]; + ASSERT(static_cast(format) < compression_factor_table.size()); + return compression_factor_table[static_cast(format)]; } static constexpr u32 GetFormatBpp(PixelFormat format) { @@ -260,8 +260,8 @@ struct SurfaceParams { 64, // Z32FS8 }}; - ASSERT(static_cast(format) < bpp_table.size()); - return bpp_table[static_cast(format)]; + ASSERT(static_cast(format) < bpp_table.size()); + return bpp_table[static_cast(format)]; } u32 GetFormatBpp() const { @@ -636,16 +636,18 @@ struct SurfaceParams { } static SurfaceType GetFormatType(PixelFormat pixel_format) { - if (static_cast(pixel_format) < static_cast(PixelFormat::MaxColorFormat)) { + if (static_cast(pixel_format) < + static_cast(PixelFormat::MaxColorFormat)) { return SurfaceType::ColorTexture; } - if (static_cast(pixel_format) < static_cast(PixelFormat::MaxDepthFormat)) { + if (static_cast(pixel_format) < + static_cast(PixelFormat::MaxDepthFormat)) { return SurfaceType::Depth; } - if (static_cast(pixel_format) < - static_cast(PixelFormat::MaxDepthStencilFormat)) { + if (static_cast(pixel_format) < + static_cast(PixelFormat::MaxDepthStencilFormat)) { return SurfaceType::DepthStencil; } @@ -659,7 +661,7 @@ struct SurfaceParams { MathUtil::Rectangle GetRect() const; /// Returns the size of this surface in bytes, adjusted for compression - size_t SizeInBytes() const { + std::size_t SizeInBytes() const { const u32 compression_factor{GetCompressionFactor(pixel_format)}; ASSERT(width % compression_factor == 0); ASSERT(height % compression_factor == 0); @@ -671,7 +673,7 @@ struct SurfaceParams { static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); /// Creates SurfaceParams from a framebuffer configuration - static SurfaceParams CreateForFramebuffer(size_t index); + static SurfaceParams CreateForFramebuffer(std::size_t index); /// Creates SurfaceParams for a depth buffer configuration static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, @@ -694,7 +696,7 @@ struct SurfaceParams { u32 height; u32 depth; u32 unaligned_height; - size_t size_in_bytes; + std::size_t size_in_bytes; SurfaceTarget target; }; @@ -711,7 +713,7 @@ struct SurfaceReserveKey : Common::HashableStruct { namespace std { template <> struct hash { - size_t operator()(const SurfaceReserveKey& k) const { + std::size_t operator()(const SurfaceReserveKey& k) const { return k.Hash(); } }; @@ -727,7 +729,7 @@ public: return params.addr; } - size_t GetSizeInBytes() const { + std::size_t GetSizeInBytes() const { return params.size_in_bytes; } @@ -775,7 +777,7 @@ public: Surface GetDepthBufferSurface(bool preserve_contents); /// Get the color surface based on the framebuffer configuration and the specified render target - Surface GetColorBufferSurface(size_t index, bool preserve_contents); + Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 61080f5cc..894fe6eae 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -14,7 +14,7 @@ namespace OpenGL { /// Gets the address for the specified shader stage program static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - const auto& shader_config = gpu.regs.shader_config[static_cast(program)]; + const auto& shader_config = gpu.regs.shader_config[static_cast(program)]; return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + shader_config.offset); } @@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) { /// Helper function to set shader uniform block bindings for a single shader stage static void SetShaderUniformBlockBinding(GLuint shader, const char* name, - Maxwell::ShaderStage binding, size_t expected_size) { + Maxwell::ShaderStage binding, std::size_t expected_size) { const GLuint ub_index = glGetUniformBlockIndex(shader, name); if (ub_index == GL_INVALID_INDEX) { return; @@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name, GLint ub_size = 0; glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); - ASSERT_MSG(static_cast(ub_size) == expected_size, + ASSERT_MSG(static_cast(ub_size) == expected_size, "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); glUniformBlockBinding(shader, ub_index, static_cast(binding)); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6e6febcbc..9bafe43a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -28,7 +28,7 @@ public: } /// Gets the size of the shader in guest memory, required for cache management - size_t GetSizeInBytes() const { + std::size_t GetSizeInBytes() const { return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2d56370c7..d58a65935 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -189,7 +189,7 @@ public: private: void AppendIndentation() { - shader_source.append(static_cast(scope) * 4, ' '); + shader_source.append(static_cast(scope) * 4, ' '); } std::string shader_source; @@ -208,7 +208,7 @@ public: UnsignedInteger, }; - GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} + GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} /// Gets the GLSL type string for a register static std::string GetTypeString() { @@ -226,12 +226,12 @@ public: } /// Returns the index of the register - size_t GetIndex() const { + std::size_t GetIndex() const { return index; } private: - const size_t index; + const std::size_t index; const std::string& suffix; }; @@ -468,7 +468,7 @@ public: /// necessary. std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) { - const size_t offset = static_cast(sampler.index.Value()); + const std::size_t offset = static_cast(sampler.index.Value()); // If this sampler has already been used, return the existing mapping. const auto itr = @@ -481,7 +481,7 @@ public: } // Otherwise create a new mapping for this sampler - const size_t next_index = used_samplers.size(); + const std::size_t next_index = used_samplers.size(); const SamplerEntry entry{stage, offset, next_index, type, is_array}; used_samplers.emplace_back(entry); return entry.GetName(); @@ -531,7 +531,7 @@ private: void BuildRegisterList() { regs.reserve(Register::NumRegisters); - for (size_t index = 0; index < Register::NumRegisters; ++index) { + for (std::size_t index = 0; index < Register::NumRegisters; ++index) { regs.emplace_back(index, suffix); } } @@ -862,7 +862,7 @@ private: */ bool IsSchedInstruction(u32 offset) const { // sched instructions appear once every 4 instructions. - static constexpr size_t SchedPeriod = 4; + static constexpr std::size_t SchedPeriod = 4; u32 absolute_offset = offset - main_offset; return (absolute_offset % SchedPeriod) == 0; @@ -930,7 +930,7 @@ private: std::string result; result += '('; - for (size_t i = 0; i < shift_amounts.size(); ++i) { + for (std::size_t i = 0; i < shift_amounts.size(); ++i) { if (i) result += '|'; result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + @@ -956,7 +956,7 @@ private: ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented"); - size_t written_components = 0; + std::size_t written_components = 0; for (u32 component = 0; component < 4; ++component) { if (!instr.texs.IsComponentEnabled(component)) { continue; @@ -1894,8 +1894,8 @@ private: UNREACHABLE(); } } - size_t dest_elem{}; - for (size_t elem = 0; elem < 4; ++elem) { + std::size_t dest_elem{}; + for (std::size_t elem = 0; elem < 4; ++elem) { if (!instr.tex.IsComponentEnabled(elem)) { // Skip disabled components continue; @@ -1999,8 +1999,8 @@ private: const std::string texture = "textureGather(" + sampler + ", coords, " + std::to_string(instr.tld4.component) + ')'; - size_t dest_elem{}; - for (size_t elem = 0; elem < 4; ++elem) { + std::size_t dest_elem{}; + for (std::size_t elem = 0; elem < 4; ++elem) { if (!instr.tex.IsComponentEnabled(elem)) { // Skip disabled components continue; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index a43e2997b..d53b93ad5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -13,7 +13,7 @@ namespace OpenGL::GLShader { -constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; +constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; using ProgramCode = std::vector; class ConstBufferEntry { @@ -51,7 +51,7 @@ public: } std::string GetName() const { - return BufferBaseNames[static_cast(stage)] + std::to_string(index); + return BufferBaseNames[static_cast(stage)] + std::to_string(index); } u32 GetHash() const { @@ -74,15 +74,15 @@ class SamplerEntry { using Maxwell = Tegra::Engines::Maxwell3D::Regs; public: - SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index, + SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array) : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} - size_t GetOffset() const { + std::size_t GetOffset() const { return offset; } - size_t GetIndex() const { + std::size_t GetIndex() const { return sampler_index; } @@ -91,7 +91,7 @@ public: } std::string GetName() const { - return std::string(TextureSamplerNames[static_cast(stage)]) + '_' + + return std::string(TextureSamplerNames[static_cast(stage)]) + '_' + std::to_string(sampler_index); } @@ -133,7 +133,7 @@ public: } static std::string GetArrayName(Maxwell::ShaderStage stage) { - return TextureSamplerNames[static_cast(stage)]; + return TextureSamplerNames[static_cast(stage)]; } private: @@ -143,9 +143,9 @@ private: /// Offset in TSC memory from which to read the sampler object, as specified by the sampling /// instruction. - size_t offset; + std::size_t offset; Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. - size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. + std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) bool is_array; ///< Whether the texture is being sampled as an array texture or not. }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 533e42caa..b86cd96e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,7 +12,7 @@ namespace OpenGL::GLShader { /// Number of OpenGL texture samplers that can be used in the fragment shader -static constexpr size_t NumTextureSamplers = 32; +static constexpr std::size_t NumTextureSamplers = 32; using Tegra::Engines::Maxwell3D; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 6f70deb96..af99132ba 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -272,7 +272,7 @@ void OpenGLState::Apply() const { } // Clip distance - for (size_t i = 0; i < clip_distance.size(); ++i) { + for (std::size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { if (clip_distance[i]) { glEnable(GL_CLIP_DISTANCE0 + static_cast(i)); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index aadf68f16..664f3ca20 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -61,7 +61,7 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a mapped_size = size; if (alignment > 0) { - buffer_pos = Common::AlignUp(buffer_pos, alignment); + buffer_pos = Common::AlignUp(buffer_pos, alignment); } bool invalidate = false; -- cgit v1.2.3 From 0019a36b41dae2afb23825ed88f90147d0633d37 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 15 Sep 2018 23:48:02 +0800 Subject: Implement RenderTargetFormat::BGR5A1_UNORM (Pokken Tournament DX) --- src/video_core/gpu.h | 1 + src/video_core/renderer_opengl/gl_rasterizer_cache.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 589a59b4f..ec83aa1a3 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 { R32_UINT = 0xE4, R32_FLOAT = 0xE5, B5G6R5_UNORM = 0xE8, + BGR5A1_UNORM = 0xE9, RG8_UNORM = 0xEA, RG8_SNORM = 0xEB, R16_UNORM = 0xEE, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 57ea8593b..ef1a15888 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -316,6 +316,8 @@ struct SurfaceParams { return PixelFormat::R11FG11FB10F; case Tegra::RenderTargetFormat::B5G6R5_UNORM: return PixelFormat::B5G6R5U; + case Tegra::RenderTargetFormat::BGR5A1_UNORM: + return PixelFormat::A1B5G5R5U; case Tegra::RenderTargetFormat::RGBA32_UINT: return PixelFormat::RGBA32UI; case Tegra::RenderTargetFormat::R8_UNORM: @@ -576,6 +578,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RG16_UNORM: case Tegra::RenderTargetFormat::R16_UNORM: case Tegra::RenderTargetFormat::B5G6R5_UNORM: + case Tegra::RenderTargetFormat::BGR5A1_UNORM: case Tegra::RenderTargetFormat::RG8_UNORM: case Tegra::RenderTargetFormat::RGBA16_UNORM: return ComponentType::UNorm; -- cgit v1.2.3 From 2845348608ccccd0ce36a351cc4a35e753d84837 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sun, 16 Sep 2018 22:47:02 +0800 Subject: Implement ASTC_2D_8X8 (Bayonetta 2) --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 9 ++++++++- src/video_core/renderer_opengl/gl_rasterizer_cache.h | 15 ++++++++++----- src/video_core/textures/decoders.cpp | 2 ++ 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 32001e44b..63bbcb666 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -167,6 +167,7 @@ static constexpr std::array tex_form {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 // Depth formats {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F @@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType static bool IsPixelFormatASTC(PixelFormat format) { switch (format) { case PixelFormat::ASTC_2D_4X4: + case PixelFormat::ASTC_2D_8X8: return true; default: return false; @@ -223,6 +225,8 @@ static std::pair GetASTCBlockSize(PixelFormat format) { switch (format) { case PixelFormat::ASTC_2D_4X4: return {4, 4}; + case PixelFormat::ASTC_2D_8X8: + return {8, 8}; default: LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast(format)); UNREACHABLE(); @@ -327,6 +331,7 @@ static constexpr std::array, MortonCopy, MortonCopy, + MortonCopy, MortonCopy, MortonCopy, MortonCopy, @@ -386,6 +391,7 @@ static constexpr std::array, MortonCopy, MortonCopy, + nullptr, MortonCopy, MortonCopy, MortonCopy, @@ -544,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector& data, u32 width, u32 height) { static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector& data, PixelFormat pixel_format, u32 width, u32 height) { switch (pixel_format) { - case PixelFormat::ASTC_2D_4X4: { + case PixelFormat::ASTC_2D_4X4: + case PixelFormat::ASTC_2D_8X8: { // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. u32 block_width{}; u32 block_height{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 57ea8593b..0525e9a94 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -70,19 +70,20 @@ struct SurfaceParams { RG8S = 42, RG32UI = 43, R32UI = 44, + ASTC_2D_8X8 = 45, MaxColorFormat, // Depth formats - Z32F = 45, - Z16 = 46, + Z32F = 46, + Z16 = 47, MaxDepthFormat, // DepthStencil formats - Z24S8 = 47, - S8Z24 = 48, - Z32FS8 = 49, + Z24S8 = 48, + S8Z24 = 49, + Z32FS8 = 50, MaxDepthStencilFormat, @@ -192,6 +193,7 @@ struct SurfaceParams { 1, // RG8S 1, // RG32UI 1, // R32UI + 4, // ASTC_2D_8X8 1, // Z32F 1, // Z16 1, // Z24S8 @@ -253,6 +255,7 @@ struct SurfaceParams { 16, // RG8S 64, // RG32UI 32, // R32UI + 16, // ASTC_2D_8X8 32, // Z32F 16, // Z16 32, // Z24S8 @@ -522,6 +525,8 @@ struct SurfaceParams { return PixelFormat::BC6H_SF16; case Tegra::Texture::TextureFormat::ASTC_2D_4X4: return PixelFormat::ASTC_2D_4X4; + case Tegra::Texture::TextureFormat::ASTC_2D_8X8: + return PixelFormat::ASTC_2D_8X8; case Tegra::Texture::TextureFormat::R16_G16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 272294c62..58b65de1a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -63,6 +63,7 @@ u32 BytesPerPixel(TextureFormat format) { case TextureFormat::R32_G32_B32: return 12; case TextureFormat::ASTC_2D_4X4: + case TextureFormat::ASTC_2D_8X8: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::BF10GF11RF11: @@ -111,6 +112,7 @@ std::vector DecodeTexture(const std::vector& texture_data, TextureFormat case TextureFormat::BC6H_UF16: case TextureFormat::BC6H_SF16: case TextureFormat::ASTC_2D_4X4: + case TextureFormat::ASTC_2D_8X8: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A1B5G5R5: -- cgit v1.2.3 From 55a47567666efedb5eeb127ce74e8f77954f2292 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 12 Sep 2018 16:17:18 -0400 Subject: Added texture misc modes to texture instructions --- src/video_core/engines/shader_bytecode.h | 148 ++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6e555ea03..88b4d0bac 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -271,6 +271,15 @@ enum class TextureProcessMode : u64 { LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL }; +enum class TextureMiscMode : u64 { + DC, + AOFFI, // Uses Offset + NDV, + NODEP, + MZ, + PTP, +}; + enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; @@ -590,42 +599,127 @@ union Instruction { BitField<28, 1, u64> array; BitField<29, 2, TextureType> texture_type; BitField<31, 4, u64> component_mask; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<54, 1, u64> aoffi_flag; BitField<55, 3, TextureProcessMode> process_mode; bool IsComponentEnabled(std::size_t component) const { return ((1ull << component) & component_mask) != 0; } + + TextureProcessMode GetTextureProcessMode() const { + return process_mode; + } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::AOFFI: + return aoffi_flag != 0; + default: + break; + } + return false; + } } tex; union { BitField<22, 6, TextureQueryType> query_type; BitField<31, 4, u64> component_mask; + BitField<49, 1, u64> nodep_flag; + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::NODEP: + return nodep_flag != 0; + default: + break; + } + return false; + } } txq; union { BitField<28, 1, u64> array; BitField<29, 2, TextureType> texture_type; BitField<31, 4, u64> component_mask; + BitField<35, 1, u64> ndv_flag; + BitField<49, 1, u64> nodep_flag; bool IsComponentEnabled(std::size_t component) const { return ((1ull << component) & component_mask) != 0; } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::NDV: + return (ndv_flag != 0); + case TextureMiscMode::NODEP: + return (nodep_flag != 0); + default: + break; + } + return false; + } } tmml; union { BitField<28, 1, u64> array; BitField<29, 2, TextureType> texture_type; + BitField<35, 1, u64> ndv_flag; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<54, 2, u64> info; BitField<56, 2, u64> component; + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::NDV: + return ndv_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::AOFFI: + return info == 1; + case TextureMiscMode::PTP: + return info == 2; + default: + break; + } + return false; + } } tld4; union { + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<51, 1, u64> aoffi_flag; BitField<52, 2, u64> component; + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::AOFFI: + return aoffi_flag != 0; + default: + break; + } + return false; + } } tld4s; union { BitField<0, 8, Register> gpr0; BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep; + BitField<49, 1, u64> nodep_flag; BitField<50, 3, u64> component_mask_selector; BitField<53, 4, u64> texture_info; @@ -645,6 +739,37 @@ union Instruction { UNREACHABLE(); } + TextureProcessMode GetTextureProcessMode() const { + switch (texture_info) { + case 0: + case 2: + case 6: + case 8: + case 9: + case 11: + return TextureProcessMode::LZ; + case 3: + case 5: + case 13: + return TextureProcessMode::LL; + default: + break; + } + return TextureProcessMode::None; + } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::DC: + return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + default: + break; + } + return false; + } + bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info >= 7 && texture_info <= 9; @@ -673,6 +798,7 @@ union Instruction { } texs; union { + BitField<49, 1, u64> nodep_flag; BitField<53, 4, u64> texture_info; TextureType GetTextureType() const { @@ -693,6 +819,26 @@ union Instruction { UNREACHABLE(); } + TextureProcessMode GetTextureProcessMode() const { + if (texture_info == 1 || texture_info == 5 || texture_info == 12) + return TextureProcessMode::LL; + return TextureProcessMode::LZ; + } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::AOFFI: + return texture_info == 12 || texture_info == 4; + case TextureMiscMode::MZ: + return texture_info == 5; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + default: + break; + } + return false; + } + bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info == 8; -- cgit v1.2.3 From 31e52113b376139183c14a3b7f46666b19951b7f Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 12 Sep 2018 16:17:44 -0400 Subject: Added asserts for texture misc modes to texture instructions --- .../renderer_opengl/gl_shader_decompiler.cpp | 47 +++++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 252ff18fc..60d0ec81f 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -954,8 +954,6 @@ private: // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented"); - std::size_t written_components = 0; for (u32 component = 0; component < 4; ++component) { if (!instr.texs.IsComponentEnabled(component)) { @@ -1871,6 +1869,13 @@ private: Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; std::string coord; + ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), + "NODEP is not implemented"); + ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), + "DC is not implemented"); + switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); @@ -1953,6 +1958,11 @@ private: Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; bool is_array{instr.texs.IsArrayTexture()}; + ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), + "NODEP is not implemented"); + ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), + "DC is not implemented"); + switch (texture_type) { case Tegra::Shader::TextureType::Texture2D: { if (is_array) { @@ -1989,6 +1999,13 @@ private: ASSERT(instr.tlds.IsArrayTexture() == false); std::string coord; + ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), + "NODEP is not implemented"); + ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), + "MZ is not implemented"); + switch (instr.tlds.GetTextureType()) { case Tegra::Shader::TextureType::Texture2D: { if (instr.tlds.IsArrayTexture()) { @@ -2017,6 +2034,17 @@ private: ASSERT(instr.tld4.array == 0); std::string coord; + ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), + "NODEP is not implemented"); + ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), + "DC is not implemented"); + ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), + "NDV is not implemented"); + ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), + "PTP is not implemented"); + switch (instr.tld4.texture_type) { case Tegra::Shader::TextureType::Texture2D: { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); @@ -2054,6 +2082,13 @@ private: break; } case OpCode::Id::TLD4S: { + ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), + "NODEP is not implemented"); + ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), + "DC is not implemented"); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. @@ -2066,6 +2101,9 @@ private: break; } case OpCode::Id::TXQ: { + ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), + "NODEP is not implemented"); + // TODO: the new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. @@ -2086,6 +2124,11 @@ private: break; } case OpCode::Id::TMML: { + ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), + "NODEP is not implemented"); + ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), + "NDV is not implemented"); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const bool is_array = instr.tmml.array != 0; -- cgit v1.2.3 From aac77bbd18f2b943b7259d8099241c4ec697b8f4 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 8 Sep 2018 21:54:11 -0400 Subject: Implemented Control Codes --- src/video_core/engines/shader_bytecode.h | 36 ++++++++++++++++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 15 +++++++++ 2 files changed, 51 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6e555ea03..12229cf4c 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -240,6 +240,41 @@ enum class FlowCondition : u64 { Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? }; +enum class ControlCode : u64 { + F = 0, + LT = 1, + EQ = 2, + LE = 3, + GT = 4, + NE = 5, + GE = 6, + Num = 7, + Nan = 8, + LTU = 9, + EQU = 10, + LEU = 11, + GTU = 12, + NEU = 13, + GEU = 14, + // + OFF = 16, + LO = 17, + SFF = 18, + LS = 19, + HI = 20, + SFT = 21, + HS = 22, + OFT = 23, + CSM_TA = 24, + CSM_TR = 25, + CSM_MX = 26, + FCSM_TA = 27, + FCSM_TR = 28, + FCSM_MX = 29, + RLE = 30, + RGT = 31, +}; + enum class PredicateResultMode : u64 { None = 0x0, NotZero = 0x3, @@ -735,6 +770,7 @@ union Instruction { BitField<36, 5, u64> index; } cbuf36; + BitField<47, 1, u64> generates_cc; BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 252ff18fc..34f9e57d4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -351,6 +351,15 @@ public: shader.AddLine(dest + " = " + src + ';'); } + std::string GetControlCode(const Tegra::Shader::ControlCode cc) { + u32 code = static_cast(cc); + return "controlCode_" + std::to_string(code); + } + + void SetControlCode(const Tegra::Shader::ControlCode cc, const std::string& value) { + shader.AddLine(GetControlCode(cc) + " = " + value + ';'); + } + /** * Writes code that does a output attribute assignment to register operation. Output attributes * are stored as floats, so this may require conversion. @@ -414,6 +423,12 @@ public: } declarations.AddNewLine(); + for (u32 cc = 0; cc < 32; cc++) { + Tegra::Shader::ControlCode code = static_cast(cc); + declarations.AddLine("bool " + GetControlCode(code) + " = false;"); + } + declarations.AddNewLine(); + for (const auto element : declr_input_attribute) { // TODO(bunnei): Use proper number of elements for these u32 idx = -- cgit v1.2.3 From e2ac8fb36d0f5540d87148226c4e93de1b6de5d3 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 8 Sep 2018 22:44:20 -0400 Subject: Implemented CSETP --- src/video_core/engines/shader_bytecode.h | 11 +++++ .../renderer_opengl/gl_shader_decompiler.cpp | 52 ++++++++++++++++------ 2 files changed, 49 insertions(+), 14 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 12229cf4c..3c5e9ea96 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -580,6 +580,15 @@ union Instruction { BitField<45, 2, PredOperation> op; } pset; + union { + BitField<0, 3, u64> pred0; + BitField<3, 3, u64> pred3; + BitField<8, 5, ControlCode> cc; // flag in cc + BitField<39, 3, u64> pred39; + BitField<42, 1, u64> neg_pred39; + BitField<45, 4, PredOperation> op; // op with pred39 + } csetp; + union { BitField<39, 3, u64> pred39; BitField<42, 1, u64> neg_pred; @@ -895,6 +904,7 @@ public: ISET_IMM, PSETP, PSET, + CSETP, XMAD_IMM, XMAD_CR, XMAD_RC, @@ -1131,6 +1141,7 @@ private: INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), + INST("010100001010----", Id::PSETP, Type::PredicateSetPredicate, "CSETP"), INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 34f9e57d4..158eb2190 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2267,13 +2267,15 @@ private: break; } case OpCode::Type::PredicateSetPredicate: { - const std::string op_a = - GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const std::string op_b = - GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); + switch (opcode->GetId()) { + case OpCode::Id::PSETP: { + const std::string op_a = + GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); + const std::string op_b = + GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); + // We can't use the constant predicate as destination. + ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); const std::string second_pred = GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); @@ -2283,15 +2285,37 @@ private: const std::string predicate = '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(instr.psetp.pred3, - '(' + predicate + ") " + combiner + " (" + second_pred + ')'); + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(instr.psetp.pred3, + '(' + predicate + ") " + combiner + " (" + second_pred + ')'); - if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - SetPredicate(instr.psetp.pred0, - "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); + if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled + SetPredicate(instr.psetp.pred0, + "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); + } + break; + } + case OpCode::Id::CSETP: { + std::string pred = + GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); + std::string combiner = GetPredicateCombiner(instr.csetp.op); + std::string controlCode = regs.GetControlCode(instr.csetp.cc); + if (instr.csetp.pred3 != static_cast(Pred::UnusedIndex)) { + SetPredicate(instr.csetp.pred3, + '(' + controlCode + ") " + combiner + " (" + pred + ')'); + } + if (instr.csetp.pred0 != static_cast(Pred::UnusedIndex)) { + SetPredicate(instr.csetp.pred0, + "!(" + controlCode + ") " + combiner + " (" + pred + ')'); + } + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName()); + UNREACHABLE(); + } } break; } -- cgit v1.2.3 From e4bb759c4bfc6916a97c3358085a9394fad5e4dc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 9 Sep 2018 00:46:19 -0400 Subject: Implemented I2I.CC on the NEU control code, used by SMO --- src/video_core/engines/shader_bytecode.h | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 30 ++++++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 3c5e9ea96..b25168600 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1141,7 +1141,7 @@ private: INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::PSETP, Type::PredicateSetPredicate, "CSETP"), + INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 158eb2190..2f1d6de3c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -351,12 +351,12 @@ public: shader.AddLine(dest + " = " + src + ';'); } - std::string GetControlCode(const Tegra::Shader::ControlCode cc) { - u32 code = static_cast(cc); - return "controlCode_" + std::to_string(code); + std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { + const u32 code = static_cast(cc); + return "controlCode_" + std::to_string(code) + suffix; } - void SetControlCode(const Tegra::Shader::ControlCode cc, const std::string& value) { + void SetControlCode(const Tegra::Shader::ControlCode cc, const std::string& value) const { shader.AddLine(GetControlCode(cc) + " = " + value + ';'); } @@ -424,7 +424,7 @@ public: declarations.AddNewLine(); for (u32 cc = 0; cc < 32; cc++) { - Tegra::Shader::ControlCode code = static_cast(cc); + const Tegra::Shader::ControlCode code = static_cast(cc); declarations.AddLine("bool " + GetControlCode(code) + " = false;"); } declarations.AddNewLine(); @@ -1656,6 +1656,10 @@ private: regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); + if (instr.generates_cc.Value() != 0) { + const std::string neucondition = "( " + op_a + " != 0 )"; + regs.SetControlCode(Tegra::Shader::ControlCode::NEU, neucondition); + } break; } case OpCode::Id::I2F_R: @@ -2277,13 +2281,13 @@ private: // We can't use the constant predicate as destination. ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); - const std::string second_pred = - GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); + const std::string second_pred = + GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - const std::string combiner = GetPredicateCombiner(instr.psetp.op); + const std::string combiner = GetPredicateCombiner(instr.psetp.op); - const std::string predicate = - '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; + const std::string predicate = + '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; // Set the primary predicate to the result of Predicate OP SecondPredicate SetPredicate(instr.psetp.pred3, @@ -2298,10 +2302,10 @@ private: break; } case OpCode::Id::CSETP: { - std::string pred = + const std::string pred = GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); - std::string combiner = GetPredicateCombiner(instr.csetp.op); - std::string controlCode = regs.GetControlCode(instr.csetp.cc); + const std::string combiner = GetPredicateCombiner(instr.csetp.op); + const std::string controlCode = regs.GetControlCode(instr.csetp.cc); if (instr.csetp.pred3 != static_cast(Pred::UnusedIndex)) { SetPredicate(instr.csetp.pred3, '(' + controlCode + ") " + combiner + " (" + pred + ')'); -- cgit v1.2.3 From 9a8dbba1e59fb9d8cbd2c394802b18eb7dc9330d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 17 Sep 2018 19:25:53 -0400 Subject: gl_shader_decompiler: Avoid truncation warnings within LD_A and ST_A code These are internally stored as u64 values, so using u32 here causes truncation warnings. Instead, we can just use u64 and preserve the bit width. --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 252ff18fc..4e7d4a24e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1779,8 +1779,8 @@ private: Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, Tegra::Shader::IpaSampleMode::Default}; - u32 next_element = instr.attribute.fmt20.element; - u32 next_index = static_cast(instr.attribute.fmt20.index.Value()); + u64 next_element = instr.attribute.fmt20.element; + u64 next_index = static_cast(instr.attribute.fmt20.index.Value()); const auto LoadNextElement = [&](u32 reg_offset) { regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, @@ -1844,8 +1844,8 @@ private: ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, "Unaligned attribute loads are not supported"); - u32 next_element = instr.attribute.fmt20.element; - u32 next_index = static_cast(instr.attribute.fmt20.index.Value()); + u64 next_element = instr.attribute.fmt20.element; + u64 next_index = static_cast(instr.attribute.fmt20.index.Value()); const auto StoreNextElement = [&](u32 reg_offset) { regs.SetOutputAttributeToRegister(static_cast(next_index), -- cgit v1.2.3 From 567a5524b9b5d77153f580bf1b548430d368e2f2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 17 Sep 2018 18:16:06 -0400 Subject: Implemented Internal Flags --- .../renderer_opengl/gl_shader_decompiler.cpp | 48 ++++++++++++++++------ 1 file changed, 35 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2f1d6de3c..f5425c31e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -235,6 +235,14 @@ private: const std::string& suffix; }; +enum class InternalFlag : u64 { + ZeroFlag = 0, + CarryFlag = 1, + OverflowFlag = 2, + NaNFlag = 3, + Amount +}; + /** * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state * of all registers (e.g. whether they are currently being used as Floats or Integers), and @@ -328,13 +336,19 @@ public: void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, const std::string& value, u64 dest_num_components, u64 value_num_components, bool is_saturated = false, - u64 dest_elem = 0, Register::Size size = Register::Size::Word) { + u64 dest_elem = 0, Register::Size size = Register::Size::Word, + bool sets_cc = false) { ASSERT_MSG(!is_saturated, "Unimplemented"); const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', dest_num_components, value_num_components, dest_elem); + + if (sets_cc) { + const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; + SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); + } } /** @@ -352,12 +366,23 @@ public: } std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { - const u32 code = static_cast(cc); - return "controlCode_" + std::to_string(code) + suffix; + switch (cc) { + case Tegra::Shader::ControlCode::NEU: + return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast(cc)); + UNREACHABLE(); + return "false"; + } } - void SetControlCode(const Tegra::Shader::ControlCode cc, const std::string& value) const { - shader.AddLine(GetControlCode(cc) + " = " + value + ';'); + std::string GetInternalFlag(const InternalFlag ii) const { + const u32 code = static_cast(ii); + return "internalFlag_" + std::to_string(code) + suffix; + } + + void SetInternalFlag(const InternalFlag ii, const std::string& value) const { + shader.AddLine(GetInternalFlag(ii) + " = " + value + ';'); } /** @@ -423,9 +448,9 @@ public: } declarations.AddNewLine(); - for (u32 cc = 0; cc < 32; cc++) { - const Tegra::Shader::ControlCode code = static_cast(cc); - declarations.AddLine("bool " + GetControlCode(code) + " = false;"); + for (u32 ii = 0; ii < static_cast(InternalFlag::Amount); ii++) { + const InternalFlag code = static_cast(ii); + declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); } declarations.AddNewLine(); @@ -1655,11 +1680,8 @@ private: } regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, - 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); - if (instr.generates_cc.Value() != 0) { - const std::string neucondition = "( " + op_a + " != 0 )"; - regs.SetControlCode(Tegra::Shader::ControlCode::NEU, neucondition); - } + 1, instr.alu.saturate_d, 0, instr.conversion.dest_size, + instr.generates_cc.Value() != 0); break; } case OpCode::Id::I2F_R: -- cgit v1.2.3