diff options
| author | bunnei <bunneidev@gmail.com> | 2018-09-17 18:53:14 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-09-17 18:53:14 -0400 | 
| commit | fafc80d72ec56b7e6f9b66e3f26a302bcf2396ac (patch) | |
| tree | f68d54782cccc42986326a5091be60047f4d1185 | |
| parent | a94b623dfb6f151dfa9c0a62ad30368a9c8058c2 (diff) | |
| parent | 2b48cfd44b9923d887314ca2ce8ad09240a997b2 (diff) | |
Merge pull request #1290 from FernandoS27/shader-header
Implemented (Partialy) Shader Header
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/engines/shader_header.h | 103 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 31 | 
3 files changed, 111 insertions, 24 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4a79ce39c..f5ae57039 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(video_core STATIC      engines/maxwell_dma.cpp      engines/maxwell_dma.h      engines/shader_bytecode.h +    engines/shader_header.h      gpu.cpp      gpu.h      macro_interpreter.cpp diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h new file mode 100644 index 000000000..a885ee3cf --- /dev/null +++ b/src/video_core/engines/shader_header.h @@ -0,0 +1,103 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra::Shader { + +enum class OutputTopology : u32 { +    PointList = 1, +    LineStrip = 6, +    TriangleStrip = 7, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture +struct Header { +    union { +        BitField<0, 5, u32> sph_type; +        BitField<5, 5, u32> version; +        BitField<10, 4, u32> shader_type; +        BitField<14, 1, u32> mrt_enable; +        BitField<15, 1, u32> kills_pixels; +        BitField<16, 1, u32> does_global_store; +        BitField<17, 4, u32> sass_version; +        BitField<21, 5, u32> reserved; +        BitField<26, 1, u32> does_load_or_store; +        BitField<27, 1, u32> does_fp64; +        BitField<28, 4, u32> stream_out_mask; +    } common0; + +    union { +        BitField<0, 24, u32> shader_local_memory_low_size; +        BitField<24, 8, u32> per_patch_attribute_count; +    } common1; + +    union { +        BitField<0, 24, u32> shader_local_memory_high_size; +        BitField<24, 8, u32> threads_per_input_primitive; +    } common2; + +    union { +        BitField<0, 24, u32> shader_local_memory_crs_size; +        BitField<24, 4, OutputTopology> output_topology; +        BitField<28, 4, u32> reserved; +    } common3; + +    union { +        BitField<0, 12, u32> max_output_vertices; +        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. +        BitField<24, 4, u32> reserved; +        BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. +    } common4; + +    union { +        struct { +            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA +            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB +            INSERT_PADDING_BYTES(16); // ImapGenericVector[32] +            INSERT_PADDING_BYTES(2);  // ImapColor +            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC +            INSERT_PADDING_BYTES(5);  // ImapFixedFncTexture[10] +            INSERT_PADDING_BYTES(1);  // ImapReserved +            INSERT_PADDING_BYTES(3);  // OmapSystemValuesA +            INSERT_PADDING_BYTES(1);  // OmapSystemValuesB +            INSERT_PADDING_BYTES(16); // OmapGenericVector[32] +            INSERT_PADDING_BYTES(2);  // OmapColor +            INSERT_PADDING_BYTES(2);  // OmapSystemValuesC +            INSERT_PADDING_BYTES(5);  // OmapFixedFncTexture[10] +            INSERT_PADDING_BYTES(1);  // OmapReserved +        } vtg; + +        struct { +            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA +            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB +            INSERT_PADDING_BYTES(32); // ImapGenericVector[32] +            INSERT_PADDING_BYTES(2);  // ImapColor +            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC +            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] +            INSERT_PADDING_BYTES(2);  // ImapReserved +            struct { +                u32 target; +                union { +                    BitField<0, 1, u32> sample_mask; +                    BitField<1, 1, u32> depth; +                    BitField<2, 30, u32> reserved; +                }; +            } omap; +            bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { +                const u32 bit = render_target * 4 + component; +                return omap.target & (1 << bit); +            } +        } ps; +    }; +}; + +static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); + +} // namespace Tegra::Shader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 252ff18fc..a1638c12e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -12,6 +12,7 @@  #include "common/assert.h"  #include "common/common_types.h"  #include "video_core/engines/shader_bytecode.h" +#include "video_core/engines/shader_header.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;  using Tegra::Shader::SubOp;  constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -constexpr u32 PROGRAM_HEADER_SIZE = 0x50; +constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);  class DecompileFail : public std::runtime_error {  public: @@ -674,7 +675,7 @@ public:                    u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)          : subroutines(subroutines), program_code(program_code), main_offset(main_offset),            stage(stage), suffix(suffix) { - +        std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));          Generate(suffix);      } @@ -688,23 +689,6 @@ public:      }  private: -    // Shader program header for a Fragment Shader. -    struct FragmentHeader { -        INSERT_PADDING_WORDS(5); -        INSERT_PADDING_WORDS(13); -        u32 enabled_color_outputs; -        union { -            BitField<0, 1, u32> writes_samplemask; -            BitField<1, 1, u32> writes_depth; -        }; - -        bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { -            const u32 bit = render_target * 4 + component; -            return enabled_color_outputs & (1 << bit); -        } -    }; -    static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); -      /// Gets the Subroutine object corresponding to the specified address.      const Subroutine& GetSubroutine(u32 begin, u32 end) const {          const auto iter = subroutines.find(Subroutine{begin, end, suffix}); @@ -1010,10 +994,8 @@ private:      /// Writes the output values from a fragment shader to the corresponding GLSL output variables.      void EmitFragmentOutputsWrite() {          ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); -        FragmentHeader header; -        std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); -        ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); +        ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");          // Write the color outputs using the data in the shader registers, disabled          // rendertargets/components are skipped in the register assignment. @@ -1022,7 +1004,7 @@ private:               ++render_target) {              // TODO(Subv): Figure out how dual-source blending is configured in the Switch.              for (u32 component = 0; component < 4; ++component) { -                if (header.IsColorComponentOutputEnabled(render_target, component)) { +                if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {                      shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,                                                 regs.GetRegisterAsFloat(current_reg)));                      ++current_reg; @@ -1030,7 +1012,7 @@ private:              }          } -        if (header.writes_depth) { +        if (header.ps.omap.depth) {              // The depth output is always 2 registers after the last color output, and current_reg              // already contains one past the last color register. @@ -2666,6 +2648,7 @@ private:  private:      const std::set<Subroutine>& subroutines;      const ProgramCode& program_code; +    Tegra::Shader::Header header;      const u32 main_offset;      Maxwell3D::Regs::ShaderStage stage;      const std::string& suffix;  | 
