diff options
| author | Yuri Kunde Schlesner <yuriks@yuriks.net> | 2017-02-04 13:02:48 -0800 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-02-04 13:02:48 -0800 | 
| commit | 97e06b0a0daccd3347ae1bcaf294093b5af32e85 (patch) | |
| tree | 59e1997c90558f58f7368d6974c355e1f20d8f32 /src/video_core | |
| parent | 18c981b99606b40897d8bc2da218e34509873246 (diff) | |
| parent | 37a4ea046d80973d59ddb7735a0ffbf0bfd93ad0 (diff) | |
Merge pull request #2476 from yuriks/shader-refactor3
Oh No! More shader changes!
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/clipper.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 33 | ||||
| -rw-r--r-- | src/video_core/pica.h | 57 | ||||
| -rw-r--r-- | src/video_core/pica_state.h | 4 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.h | 5 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/rasterizer.h | 40 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 63 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 62 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 5 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.h | 4 | 
16 files changed, 160 insertions, 161 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 05b5cea73..0774ffc53 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -18,6 +18,8 @@  #include "video_core/rasterizer.h"  #include "video_core/shader/shader.h" +using Pica::Rasterizer::Vertex; +  namespace Pica {  namespace Clipper { @@ -29,20 +31,20 @@ public:                                                   float24::FromFloat32(0), float24::FromFloat32(0)))          : coeffs(coeffs), bias(bias) {} -    bool IsInside(const OutputVertex& vertex) const { +    bool IsInside(const Vertex& vertex) const {          return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);      } -    bool IsOutSide(const OutputVertex& vertex) const { +    bool IsOutSide(const Vertex& vertex) const {          return !IsInside(vertex);      } -    OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { +    Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const {          float24 dp = Math::Dot(v0.pos + bias, coeffs);          float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);          float24 factor = dp_prev / (dp_prev - dp); -        return OutputVertex::Lerp(factor, v0, v1); +        return Vertex::Lerp(factor, v0, v1);      }  private: @@ -51,7 +53,7 @@ private:      Math::Vec4<float24> bias;  }; -static void InitScreenCoordinates(OutputVertex& vtx) { +static void InitScreenCoordinates(Vertex& vtx) {      struct {          float24 halfsize_x;          float24 offset_x; @@ -91,8 +93,8 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu      // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a      // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.      static const size_t MAX_VERTICES = 9; -    static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; -    static_vector<OutputVertex, MAX_VERTICES> buffer_b; +    static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; +    static_vector<Vertex, MAX_VERTICES> buffer_b;      auto* output_list = &buffer_a;      auto* input_list = &buffer_b; @@ -123,7 +125,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu          std::swap(input_list, output_list);          output_list->clear(); -        const OutputVertex* reference_vertex = &input_list->back(); +        const Vertex* reference_vertex = &input_list->back();          for (const auto& vertex : *input_list) {              // NOTE: This algorithm changes vertex order in some cases! @@ -148,9 +150,9 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu      InitScreenCoordinates((*output_list)[1]);      for (size_t i = 0; i < output_list->size() - 2; i++) { -        OutputVertex& vtx0 = (*output_list)[0]; -        OutputVertex& vtx1 = (*output_list)[i + 1]; -        OutputVertex& vtx2 = (*output_list)[i + 2]; +        Vertex& vtx0 = (*output_list)[0]; +        Vertex& vtx1 = (*output_list)[i + 1]; +        Vertex& vtx2 = (*output_list)[i + 2];          InitScreenCoordinates(vtx2); diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index eb79974a8..4955ff9f9 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -125,20 +125,21 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {              // TODO: Verify that this actually modifies the register!              if (setup.index < 15) { -                g_state.vs_default_attributes[setup.index] = attribute; +                g_state.input_default_attributes.attr[setup.index] = attribute;                  setup.index++;              } else { -                // Put each attribute into an immediate input buffer. -                // When all specified immediate attributes are present, the Vertex Shader is invoked -                // and everything is -                // sent to the primitive assembler. +                // Put each attribute into an immediate input buffer.  When all specified immediate +                // attributes are present, the Vertex Shader is invoked and everything is sent to +                // the primitive assembler.                  auto& immediate_input = g_state.immediate.input_vertex;                  auto& immediate_attribute_id = g_state.immediate.current_attribute; -                immediate_input.attr[immediate_attribute_id++] = attribute; +                immediate_input.attr[immediate_attribute_id] = attribute; -                if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { +                if (immediate_attribute_id < regs.max_input_attrib_index) { +                    immediate_attribute_id += 1; +                } else {                      MICROPROFILE_SCOPE(GPU_Drawing);                      immediate_attribute_id = 0; @@ -150,10 +151,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {                          g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,                                                   static_cast<void*>(&immediate_input));                      Shader::UnitState shader_unit; -                    shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); +                    Shader::AttributeBuffer output{}; + +                    shader_unit.LoadInput(regs.vs, immediate_input);                      shader_engine->Run(g_state.vs, shader_unit); -                    auto output_vertex = Shader::OutputVertex::FromRegisters( -                        shader_unit.registers.output, regs, regs.vs.output_mask); +                    shader_unit.WriteOutput(regs.vs, output);                      // Send to renderer                      using Pica::Shader::OutputVertex; @@ -162,7 +164,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {                          VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);                      }; -                    g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); +                    g_state.primitive_assembler.SubmitVertex( +                        Shader::OutputVertex::FromAttributeBuffer(regs, output), AddTriangle);                  }              }          } @@ -280,19 +283,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {              if (!vertex_cache_hit) {                  // Initialize data for the current vertex -                Shader::InputVertex input; +                Shader::AttributeBuffer input, output{};                  loader.LoadVertex(base_address, index, vertex, input, memory_accesses);                  // Send to vertex shader                  if (g_debug_context)                      g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,                                               (void*)&input); -                shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); +                shader_unit.LoadInput(regs.vs, input);                  shader_engine->Run(g_state.vs, shader_unit); +                shader_unit.WriteOutput(regs.vs, output);                  // Retrieve vertex from register data -                output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, -                                                                    regs, regs.vs.output_mask); +                output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs, output);                  if (is_indexed) {                      vertex_cache[vertex_cache_pos] = output_vertex; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 4ab4f1f40..731540b99 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -99,7 +99,8 @@ struct Regs {              TEXCOORD1_U = 14,              TEXCOORD1_V = 15, -            // TODO: Not verified +            TEXCOORD0_W = 16, +              VIEW_X = 18,              VIEW_Y = 19,              VIEW_Z = 20, @@ -871,7 +872,7 @@ struct Regs {          LightSrc light[8];          LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)          INSERT_PADDING_WORDS(0x1); -        BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 +        BitField<0, 3, u32> max_light_index; // Number of enabled lights - 1          union {              BitField<2, 2, LightingFresnelSelector> fresnel_selector; @@ -1048,7 +1049,7 @@ struct Regs {              BitField<48, 12, u64> attribute_mask;              // number of total attributes minus 1 -            BitField<60, 4, u64> num_extra_attributes; +            BitField<60, 4, u64> max_attribute_index;          };          inline VertexAttributeFormat GetFormat(int n) const { @@ -1079,7 +1080,7 @@ struct Regs {          }          inline int GetNumTotalAttributes() const { -            return (int)num_extra_attributes + 1; +            return (int)max_attribute_index + 1;          }          // Attribute loaders map the source vertex data to input attributes @@ -1179,7 +1180,12 @@ struct Regs {          }      } command_buffer; -    INSERT_PADDING_WORDS(0x07); +    INSERT_PADDING_WORDS(4); + +    /// Number of input attributes to the vertex shader minus 1 +    BitField<0, 4, u32> max_input_attrib_index; + +    INSERT_PADDING_WORDS(2);      enum class GPUMode : u32 {          Drawing = 0, @@ -1217,42 +1223,21 @@ struct Regs {          union {              // Number of input attributes to shader unit - 1 -            BitField<0, 4, u32> num_input_attributes; +            BitField<0, 4, u32> max_input_attribute_index;          };          // Offset to shader program entry point (in words)          BitField<0, 16, u32> main_offset; -        union { -            BitField<0, 4, u64> attribute0_register; -            BitField<4, 4, u64> attribute1_register; -            BitField<8, 4, u64> attribute2_register; -            BitField<12, 4, u64> attribute3_register; -            BitField<16, 4, u64> attribute4_register; -            BitField<20, 4, u64> attribute5_register; -            BitField<24, 4, u64> attribute6_register; -            BitField<28, 4, u64> attribute7_register; -            BitField<32, 4, u64> attribute8_register; -            BitField<36, 4, u64> attribute9_register; -            BitField<40, 4, u64> attribute10_register; -            BitField<44, 4, u64> attribute11_register; -            BitField<48, 4, u64> attribute12_register; -            BitField<52, 4, u64> attribute13_register; -            BitField<56, 4, u64> attribute14_register; -            BitField<60, 4, u64> attribute15_register; - -            int GetRegisterForAttribute(int attribute_index) const { -                u64 fields[] = { -                    attribute0_register,  attribute1_register,  attribute2_register, -                    attribute3_register,  attribute4_register,  attribute5_register, -                    attribute6_register,  attribute7_register,  attribute8_register, -                    attribute9_register,  attribute10_register, attribute11_register, -                    attribute12_register, attribute13_register, attribute14_register, -                    attribute15_register, -                }; -                return (int)fields[attribute_index]; -            } -        } input_register_map; +        /// Maps input attributes to registers. 4-bits per attribute, specifying a register index +        u32 input_attribute_to_register_map_low; +        u32 input_attribute_to_register_map_high; + +        unsigned int GetRegisterForAttribute(unsigned int attribute_index) const { +            u64 map = ((u64)input_attribute_to_register_map_high << 32) | +                      (u64)input_attribute_to_register_map_low; +            return (map >> (attribute_index * 4)) & 0b1111; +        }          BitField<0, 16, u32> output_mask; diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index e4f2e6d5d..785d05650 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -23,7 +23,7 @@ struct State {      Shader::ShaderSetup vs;      Shader::ShaderSetup gs; -    std::array<Math::Vec4<float24>, 16> vs_default_attributes; +    Shader::AttributeBuffer input_default_attributes;      struct {          union LutEntry { @@ -66,7 +66,7 @@ struct State {      /// Struct used to describe immediate mode rendering state      struct ImmediateModeState {          // Used to buffer partial vertices for immediate-mode rendering. -        Shader::InputVertex input_vertex; +        Shader::AttributeBuffer input_vertex;          // Index of the next attribute to be loaded into `input_vertex`.          u32 current_attribute = 0;      } immediate; diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index be7377290..e71ff5719 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -14,7 +14,7 @@ PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topolo      : topology(topology), buffer_index(0) {}  template <typename VertexType> -void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, +void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,                                                    TriangleHandler triangle_handler) {      switch (topology) {      // TODO: Figure out what's different with TriangleTopology::Shader. diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 0384d5984..24da47382 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -15,7 +15,8 @@ namespace Pica {   */  template <typename VertexType>  struct PrimitiveAssembler { -    using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>; +    using TriangleHandler = +        std::function<void(const VertexType& v0, const VertexType& v1, const VertexType& v2)>;      PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); @@ -25,7 +26,7 @@ struct PrimitiveAssembler {       * NOTE: We could specify the triangle handler in the constructor, but this way we can       * keep event and handler code next to each other.       */ -    void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); +    void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler);      /**       * Resets the internal state of the PrimitiveAssembler. diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index c034c12d3..287d732b5 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -308,8 +308,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24   * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing   * culling via recursion.   */ -static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, -                                    const Shader::OutputVertex& v2, bool reversed = false) { +static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2, +                                    bool reversed = false) {      const auto& regs = g_state.regs;      MICROPROFILE_SCOPE(GPU_Rasterization); @@ -1277,8 +1277,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader      }  } -void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, -                     const Shader::OutputVertex& v2) { +void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) {      ProcessTriangleInternal(v0, v1, v2);  } diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h index 6cbda3067..3a72ac343 100644 --- a/src/video_core/rasterizer.h +++ b/src/video_core/rasterizer.h @@ -4,16 +4,44 @@  #pragma once -namespace Pica { +#include "video_core/shader/shader.h" -namespace Shader { -struct OutputVertex; -} +namespace Pica {  namespace Rasterizer { -void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, -                     const Shader::OutputVertex& v2); +struct Vertex : Shader::OutputVertex { +    Vertex(const OutputVertex& v) : OutputVertex(v) {} + +    // Attributes used to store intermediate results +    // position after perspective divide +    Math::Vec3<float24> screenpos; + +    // Linear interpolation +    // factor: 0=this, 1=vtx +    void Lerp(float24 factor, const Vertex& vtx) { +        pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); + +        // TODO: Should perform perspective correct interpolation here... +        tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); +        tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); +        tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); + +        screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); + +        color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); +    } + +    // Linear interpolation +    // factor: 0=v0, 1=v1 +    static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { +        Vertex ret = v0; +        ret.Lerp(factor, v1); +        return ret; +    } +}; + +void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2);  } // namespace Rasterizer diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f3674e965..071e4ace0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -467,7 +467,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {      // Fragment lighting switches      case PICA_REG_INDEX(lighting.disable): -    case PICA_REG_INDEX(lighting.num_lights): +    case PICA_REG_INDEX(lighting.max_light_index):      case PICA_REG_INDEX(lighting.config0):      case PICA_REG_INDEX(lighting.config1):      case PICA_REG_INDEX(lighting.abs_lut_input): diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index cc3e4bed5..a1aa07074 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -84,7 +84,7 @@ union PicaShaderConfig {          // Fragment lighting          state.lighting.enable = !regs.lighting.disable; -        state.lighting.src_num = regs.lighting.num_lights + 1; +        state.lighting.src_num = regs.lighting.max_light_index + 1;          for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {              unsigned num = regs.lighting.light_enable.GetNum(light_index); diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2da50bd62..f5f7ea61d 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -4,6 +4,7 @@  #include <cmath>  #include <cstring> +#include "common/bit_set.h"  #include "common/logging/log.h"  #include "common/microprofile.h"  #include "video_core/pica.h" @@ -19,38 +20,32 @@ namespace Pica {  namespace Shader { -OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, -                                         u32 output_mask) { +OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) {      // Setup output data -    OutputVertex ret; -    // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to -    // figure out what those circumstances are and enable the remaining outputs then. -    unsigned index = 0; -    for (unsigned i = 0; i < 7; ++i) { +    union { +        OutputVertex ret{}; +        std::array<float24, 24> vertex_slots; +    }; +    static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes."); -        if (index >= regs.vs_output_total) -            break; +    unsigned int num_attributes = regs.vs_output_total; +    ASSERT(num_attributes <= 7); +    for (unsigned int i = 0; i < num_attributes; ++i) { +        const auto& output_register_map = regs.vs_output_attributes[i]; -        if ((output_mask & (1 << i)) == 0) -            continue; - -        const auto& output_register_map = regs.vs_output_attributes[index]; - -        u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, -                            output_register_map.map_z, output_register_map.map_w}; +        Regs::VSOutputAttributes::Semantic semantics[4] = { +            output_register_map.map_x, output_register_map.map_y, output_register_map.map_z, +            output_register_map.map_w};          for (unsigned comp = 0; comp < 4; ++comp) { -            float24* out = ((float24*)&ret) + semantics[comp]; -            if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { -                *out = output_regs[i][comp]; -            } else { -                // Zero output so that attributes which aren't output won't have denormals in them, -                // which would slow us down later. -                memset(out, 0, sizeof(*out)); +            Regs::VSOutputAttributes::Semantic semantic = semantics[comp]; +            float24* out = &vertex_slots[semantic]; +            if (semantic < vertex_slots.size()) { +                *out = input.attr[i][comp]; +            } else if (semantic != Regs::VSOutputAttributes::INVALID) { +                LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic);              }          } - -        index++;      }      // The hardware takes the absolute and saturates vertex colors like this, *before* doing @@ -71,12 +66,20 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co      return ret;  } -void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { -    // Setup input register table -    const auto& attribute_register_map = g_state.regs.vs.input_register_map; +void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) { +    const unsigned max_attribute = config.max_input_attribute_index; -    for (int i = 0; i < num_attributes; i++) -        registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; +    for (unsigned attr = 0; attr <= max_attribute; ++attr) { +        unsigned reg = config.GetRegisterForAttribute(attr); +        registers.input[reg] = input.attr[attr]; +    } +} + +void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) { +    unsigned int output_i = 0; +    for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) { +        output.attr[output_i++] = registers.output[reg]; +    }  }  MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 44d9f76c3..b188d3edf 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -23,14 +23,11 @@ namespace Pica {  namespace Shader { -struct InputVertex { +struct AttributeBuffer {      alignas(16) Math::Vec4<float24> attr[16];  };  struct OutputVertex { -    OutputVertex() = default; - -    // VS output attributes      Math::Vec4<float24> pos;      Math::Vec4<float24> quat;      Math::Vec4<float24> color; @@ -42,43 +39,22 @@ struct OutputVertex {      INSERT_PADDING_WORDS(1);      Math::Vec2<float24> tc2; -    // Padding for optimal alignment -    INSERT_PADDING_WORDS(4); - -    // Attributes used to store intermediate results - -    // position after perspective divide -    Math::Vec3<float24> screenpos; -    INSERT_PADDING_WORDS(1); - -    // Linear interpolation -    // factor: 0=this, 1=vtx -    void Lerp(float24 factor, const OutputVertex& vtx) { -        pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); - -        // TODO: Should perform perspective correct interpolation here... -        tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); -        tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); -        tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); - -        screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); - -        color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); -    } - -    // Linear interpolation -    // factor: 0=v0, 1=v1 -    static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { -        OutputVertex ret = v0; -        ret.Lerp(factor, v1); -        return ret; -    } - -    static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, -                                      u32 output_mask); +    static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output);  }; +#define ASSERT_POS(var, pos)                                                                       \ +    static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong "       \ +                                                                        "offset.") +ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X); +ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X); +ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R); +ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U); +ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U); +ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W); +ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X); +ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U); +#undef ASSERT_POS  static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); -static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); +static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");  /**   * This structure contains the state information that needs to be unique for a shader unit. The 3DS @@ -137,10 +113,12 @@ struct UnitState {      /**       * Loads the unit state with an input vertex.       * -     * @param input Input vertex into the shader -     * @param num_attributes The number of vertex shader attributes to load +     * @param config Shader configuration registers corresponding to the unit. +     * @param input Attribute buffer to load into the input registers.       */ -    void LoadInputVertex(const InputVertex& input, int num_attributes); +    void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); + +    void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output);  };  struct ShaderSetup { diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index c0c89b857..81522b8f5 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const {  }  DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, -                                                    const InputVertex& input, -                                                    int num_attributes) const { +                                                    const AttributeBuffer& input, +                                                    const Regs::ShaderConfig& config) const {      UnitState state;      DebugData<true> debug_data;      // Setup input register table      boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); -    state.LoadInputVertex(input, num_attributes); +    state.LoadInput(config, input);      RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);      return debug_data;  } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index d6c0e2d8c..d7a61e122 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -19,12 +19,11 @@ public:      /**       * Produce debug information based on the given shader and input vertex       * @param input Input vertex into the shader -     * @param num_attributes The number of vertex shader attributes       * @param config Configuration object for the shader pipeline       * @return Debug information for this shader with regards to the given vertex       */ -    DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, -                                     int num_attributes) const; +    DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, +                                     const Regs::ShaderConfig& config) const;  };  } // namespace diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 2b8ef7018..bf83b61ca 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -70,7 +70,8 @@ void VertexLoader::Setup(const Pica::Regs& regs) {      is_setup = true;  } -void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, +void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, +                              Shader::AttributeBuffer& input,                                DebugUtils::MemoryAccessTracker& memory_accesses) {      ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); @@ -142,7 +143,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I                        input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());          } else if (vertex_attribute_is_default[i]) {              // Load the default attribute if we're configured to do so -            input.attr[i] = g_state.vs_default_attributes[i]; +            input.attr[i] = g_state.input_default_attributes.attr[i];              LOG_TRACE(HW_GPU,                        "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i,                        vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index 9f2098bb2..51f3d45b4 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h @@ -11,7 +11,7 @@ class MemoryAccessTracker;  }  namespace Shader { -struct InputVertex; +struct AttributeBuffer;  }  class VertexLoader { @@ -22,7 +22,7 @@ public:      }      void Setup(const Pica::Regs& regs); -    void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, +    void LoadVertex(u32 base_address, int index, int vertex, Shader::AttributeBuffer& input,                      DebugUtils::MemoryAccessTracker& memory_accesses);      int GetNumTotalAttributes() const {  | 
