diff options
14 files changed, 277 insertions, 91 deletions
| diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index bf2210899..01b77a7d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -140,7 +140,27 @@ Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin      return id;  } +u32 NumVertices(InputTopology input_topology) { +    switch (input_topology) { +    case InputTopology::Points: +        return 1; +    case InputTopology::Lines: +        return 2; +    case InputTopology::LinesAdjacency: +        return 4; +    case InputTopology::Triangles: +        return 3; +    case InputTopology::TrianglesAdjacency: +        return 6; +    } +    throw InvalidArgument("Invalid input topology {}", input_topology); +} +  Id DefineInput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin = std::nullopt) { +    if (ctx.stage == Stage::Geometry) { +        const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; +        type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); +    }      return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);  } @@ -455,12 +475,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {  void EmitContext::DefineAttributeMemAccess(const Info& info) {      const auto make_load{[&] { +        const bool is_array{stage == Stage::Geometry};          const Id end_block{OpLabel()};          const Id default_label{OpLabel()}; -        const Id func_type_load{TypeFunction(F32[1], U32[1])}; +        const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) +                                         : TypeFunction(F32[1], U32[1])};          const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};          const Id offset{OpFunctionParameter(U32[1])}; +        const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; +          AddLabel();          const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))};          const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; @@ -472,7 +496,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {              labels.push_back(OpLabel());          }          const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; -        for (u32 i = 0; i < info.input_generics.size(); i++) { +        for (u32 i = 0; i < info.input_generics.size(); ++i) {              if (!info.input_generics[i].used) {                  continue;              } @@ -486,7 +510,10 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {          size_t label_index{0};          if (info.loads_position) {              AddLabel(labels[label_index]); -            const Id result{OpLoad(F32[1], OpAccessChain(input_f32, input_position, masked_index))}; +            const Id pointer{is_array +                                 ? OpAccessChain(input_f32, input_position, vertex, masked_index) +                                 : OpAccessChain(input_f32, input_position, masked_index)}; +            const Id result{OpLoad(F32[1], pointer)};              OpReturnValue(result);              ++label_index;          } @@ -502,7 +529,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {                  continue;              }              const Id generic_id{input_generics.at(i)}; -            const Id pointer{OpAccessChain(type->pointer, generic_id, masked_index)}; +            const Id pointer{is_array +                                 ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) +                                 : OpAccessChain(type->pointer, generic_id, masked_index)};              const Id value{OpLoad(type->id, pointer)};              const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};              OpReturnValue(result); @@ -910,13 +939,13 @@ void EmitContext::DefineOutputs(const Info& info) {      }      if (info.stores_point_size || profile.fixed_state_point_size) {          if (stage == Stage::Fragment) { -            throw NotImplementedException("Storing PointSize in Fragment stage"); +            throw NotImplementedException("Storing PointSize in fragment stage");          }          output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize);      }      if (info.stores_clip_distance) {          if (stage == Stage::Fragment) { -            throw NotImplementedException("Storing PointSize in Fragment stage"); +            throw NotImplementedException("Storing ClipDistance in fragment stage");          }          const Id type{TypeArray(F32[1], Constant(U32[1], 8U))};          clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); @@ -924,7 +953,7 @@ void EmitContext::DefineOutputs(const Info& info) {      if (info.stores_viewport_index &&          (profile.support_viewport_index_layer_non_geometry || stage == Shader::Stage::Geometry)) {          if (stage == Stage::Fragment) { -            throw NotImplementedException("Storing ViewportIndex in Fragment stage"); +            throw NotImplementedException("Storing ViewportIndex in fragment stage");          }          viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex);      } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3258b0cf8..d7c5890ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -134,6 +134,44 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {      case Shader::Stage::VertexB:          execution_model = spv::ExecutionModel::Vertex;          break; +    case Shader::Stage::Geometry: +        execution_model = spv::ExecutionModel::Geometry; +        ctx.AddCapability(spv::Capability::Geometry); +        ctx.AddCapability(spv::Capability::GeometryStreams); +        switch (ctx.profile.input_topology) { +        case InputTopology::Points: +            ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); +            break; +        case InputTopology::Lines: +            ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); +            break; +        case InputTopology::LinesAdjacency: +            ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); +            break; +        case InputTopology::Triangles: +            ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); +            break; +        case InputTopology::TrianglesAdjacency: +            ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); +            break; +        } +        switch (program.output_topology) { +        case OutputTopology::PointList: +            ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); +            break; +        case OutputTopology::LineStrip: +            ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); +            break; +        case OutputTopology::TriangleStrip: +            ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); +            break; +        } +        if (program.info.stores_point_size) { +            ctx.AddCapability(spv::Capability::GeometryPointSize); +        } +        ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); +        ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); +        break;      case Shader::Stage::Fragment:          execution_model = spv::ExecutionModel::Fragment;          ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 440075212..c0e1b8833 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -34,8 +34,8 @@ void EmitMemoryBarrierDeviceLevel(EmitContext& ctx);  void EmitMemoryBarrierSystemLevel(EmitContext& ctx);  void EmitPrologue(EmitContext& ctx);  void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, Id stream); -void EmitEndPrimitive(EmitContext& ctx, Id stream); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);  void EmitGetRegister(EmitContext& ctx);  void EmitSetRegister(EmitContext& ctx);  void EmitGetPred(EmitContext& ctx); @@ -51,10 +51,10 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o  Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);  Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);  Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);  void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);  void EmitSetFragDepth(EmitContext& ctx, Id value);  void EmitGetZFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d552a1b52..a91b4c212 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include <tuple> +#include <utility>  #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -29,6 +30,15 @@ std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {      throw InvalidArgument("Invalid attribute type {}", type);  } +template <typename... Args> +Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { +    if (ctx.stage == Stage::Geometry) { +        return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...); +    } else { +        return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...); +    } +} +  std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {      const u32 element{static_cast<u32>(attr) % 4};      const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; @@ -66,6 +76,31 @@ std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {          throw NotImplementedException("Read attribute {}", attr);      }  } + +Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, +           const IR::Value& binding, const IR::Value& offset) { +    if (!binding.IsImmediate()) { +        throw NotImplementedException("Constant buffer indexing"); +    } +    const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; +    const Id uniform_type{ctx.uniform_types.*member_ptr}; +    if (!offset.IsImmediate()) { +        Id index{ctx.Def(offset)}; +        if (element_size > 1) { +            const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; +            const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; +            index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); +        } +        const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; +        return ctx.OpLoad(result_type, access_chain); +    } +    if (offset.U32() % element_size != 0) { +        throw NotImplementedException("Unaligned immediate constant buffer load"); +    } +    const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; +    const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; +    return ctx.OpLoad(result_type, access_chain); +}  } // Anonymous namespace  void EmitGetRegister(EmitContext&) { @@ -100,31 +135,6 @@ void EmitGetIndirectBranchVariable(EmitContext&) {      throw NotImplementedException("SPIR-V Instruction");  } -static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, -                  u32 element_size, const IR::Value& binding, const IR::Value& offset) { -    if (!binding.IsImmediate()) { -        throw NotImplementedException("Constant buffer indexing"); -    } -    const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; -    const Id uniform_type{ctx.uniform_types.*member_ptr}; -    if (!offset.IsImmediate()) { -        Id index{ctx.Def(offset)}; -        if (element_size > 1) { -            const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; -            const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; -            index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); -        } -        const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; -        return ctx.OpLoad(result_type, access_chain); -    } -    if (offset.U32() % element_size != 0) { -        throw NotImplementedException("Unaligned immediate constant buffer load"); -    } -    const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; -    const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; -    return ctx.OpLoad(result_type, access_chain); -} -  Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {      const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};      return ctx.OpUConvert(ctx.U32[1], load); @@ -157,7 +167,7 @@ Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value&      return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset);  } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {      const u32 element{static_cast<u32>(attr) % 4};      const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};      if (IR::IsGeneric(attr)) { @@ -168,7 +178,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {              return ctx.Constant(ctx.F32[1], 0.0f);          }          const Id generic_id{ctx.input_generics.at(index)}; -        const Id pointer{ctx.OpAccessChain(type->pointer, generic_id, element_id())}; +        const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())};          const Id value{ctx.OpLoad(type->id, pointer)};          return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;      } @@ -177,8 +187,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {      case IR::Attribute::PositionY:      case IR::Attribute::PositionZ:      case IR::Attribute::PositionW: -        return ctx.OpLoad(ctx.F32[1], -                          ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id())); +        return ctx.OpLoad( +            ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id()));      case IR::Attribute::InstanceId:          if (ctx.profile.support_vertex_instance_id) {              return ctx.OpLoad(ctx.U32[1], ctx.instance_id); @@ -198,29 +208,32 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {                              ctx.Constant(ctx.U32[1], std::numeric_limits<u32>::max()),                              ctx.u32_zero_value);      case IR::Attribute::PointSpriteS: -        return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, -                                                        ctx.Constant(ctx.U32[1], 0U))); +        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, +                                                  ctx.u32_zero_value));      case IR::Attribute::PointSpriteT: -        return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, -                                                        ctx.Constant(ctx.U32[1], 1U))); +        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, +                                                  ctx.Constant(ctx.U32[1], 1U)));      default:          throw NotImplementedException("Read attribute {}", attr);      }  } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {      const std::optional<Id> output{OutputAttrPointer(ctx, attr)}; -    if (!output) { -        return; +    if (output) { +        ctx.OpStore(*output, value);      } -    ctx.OpStore(*output, value);  } -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset) { -    return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { +    if (ctx.stage == Stage::Geometry) { +        return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); +    } else { +        return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); +    }  } -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value) { +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) {      ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);  } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d20f4def3..6c8fcd5a5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -5,6 +5,17 @@  #include "shader_recompiler/backend/spirv/emit_spirv.h"  namespace Shader::Backend::SPIRV { +namespace { +void ConvertDepthMode(EmitContext& ctx) { +    const Id type{ctx.F32[1]}; +    const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; +    const Id z{ctx.OpCompositeExtract(type, position, 2u)}; +    const Id w{ctx.OpCompositeExtract(type, position, 3u)}; +    const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; +    const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; +    ctx.OpStore(ctx.output_position, vector); +} +} // Anonymous namespace  void EmitPrologue(EmitContext& ctx) {      if (ctx.stage == Stage::VertexB) { @@ -25,23 +36,30 @@ void EmitPrologue(EmitContext& ctx) {  }  void EmitEpilogue(EmitContext& ctx) { -    if (ctx.profile.convert_depth_mode) { -        const Id type{ctx.F32[1]}; -        const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; -        const Id z{ctx.OpCompositeExtract(type, position, 2u)}; -        const Id w{ctx.OpCompositeExtract(type, position, 3u)}; -        const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; -        const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; -        ctx.OpStore(ctx.output_position, vector); +    if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { +        ConvertDepthMode(ctx);      }  } -void EmitEmitVertex(EmitContext& ctx, Id stream) { -    ctx.OpEmitStreamVertex(stream); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { +    if (ctx.profile.convert_depth_mode) { +        ConvertDepthMode(ctx); +    } +    if (!stream.IsImmediate()) { +        // LOG_WARNING(..., "EmitVertex's stream is not constant"); +        ctx.OpEmitStreamVertex(ctx.u32_zero_value); +        return; +    } +    ctx.OpEmitStreamVertex(ctx.Def(stream));  } -void EmitEndPrimitive(EmitContext& ctx, Id stream) { -    ctx.OpEndStreamPrimitive(stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { +    if (!stream.IsImmediate()) { +        // LOG_WARNING(..., "EndPrimitive's stream is not constant"); +        ctx.OpEndStreamPrimitive(ctx.u32_zero_value); +        return; +    } +    ctx.OpEndStreamPrimitive(ctx.Def(stream));  }  } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 7d48fa1ba..d66eb17a6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -308,19 +308,27 @@ U1 IREmitter::GetFlowTestResult(FlowTest test) {  }  F32 IREmitter::GetAttribute(IR::Attribute attribute) { -    return Inst<F32>(Opcode::GetAttribute, attribute); +    return GetAttribute(attribute, Imm32(0));  } -void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { -    Inst(Opcode::SetAttribute, attribute, value); +F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { +    return Inst<F32>(Opcode::GetAttribute, attribute, vertex); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { +    Inst(Opcode::SetAttribute, attribute, value, vertex);  }  F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { -    return Inst<F32>(Opcode::GetAttributeIndexed, phys_address); +    return GetAttributeIndexed(phys_address, Imm32(0)); +} + +F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { +    return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);  } -void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) { -    Inst(Opcode::SetAttributeIndexed, phys_address, value); +void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { +    Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);  }  void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 033c4332e..e70359eb1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -77,10 +77,12 @@ public:      [[nodiscard]] U1 GetFlowTestResult(FlowTest test);      [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); -    void SetAttribute(IR::Attribute attribute, const F32& value); +    [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); +    void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);      [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); -    void SetAttributeIndexed(const U32& phys_address, const F32& value); +    [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); +    void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);      void SetFragColor(u32 index, u32 component, const F32& value);      void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0e487f1a7..7a21fe746 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -44,10 +44,10 @@ OPCODE(GetCbufS16,                                          U32,            U32,  OPCODE(GetCbufU32,                                          U32,            U32,            U32,                                                            )  OPCODE(GetCbufF32,                                          F32,            U32,            U32,                                                            )  OPCODE(GetCbufU32x2,                                        U32x2,          U32,            U32,                                                            ) -OPCODE(GetAttribute,                                        F32,            Attribute,                                                                      ) -OPCODE(SetAttribute,                                        Void,           Attribute,      F32,                                                            ) -OPCODE(GetAttributeIndexed,                                 F32,            U32,                                                                            ) -OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,                                                            ) +OPCODE(GetAttribute,                                        F32,            Attribute,      U32,                                                            ) +OPCODE(SetAttribute,                                        Void,           Attribute,      F32,            U32,                                            ) +OPCODE(GetAttributeIndexed,                                 F32,            U32,            U32,                                                            ) +OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,            U32,                                            )  OPCODE(SetFragColor,                                        Void,           U32,            U32,            F32,                                            )  OPCODE(SetFragDepth,                                        Void,           F32,                                                                            )  OPCODE(GetZFlag,                                            U1,             Void,                                                                           ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 3a37b3ab9..51e1a8c77 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@  #include <boost/container/small_vector.hpp>  #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/program_header.h"  #include "shader_recompiler/shader_info.h"  #include "shader_recompiler/stage.h" @@ -21,6 +22,9 @@ struct Program {      Info info;      Stage stage{};      std::array<u32, 3> workgroup_size{}; +    OutputTopology output_topology{}; +    u32 output_vertices{}; +    u32 invocations{};      u32 local_memory_size{};      u32 shared_memory_size{};  }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aaf2a74a7..ab67446c8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,9 +69,20 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo      program.post_order_blocks = PostOrder(program.blocks);      program.stage = env.ShaderStage();      program.local_memory_size = env.LocalMemorySize(); -    if (program.stage == Stage::Compute) { +    switch (program.stage) { +    case Stage::Geometry: { +        const ProgramHeader& sph{env.SPH()}; +        program.output_topology = sph.common3.output_topology; +        program.output_vertices = sph.common4.max_output_vertices; +        program.invocations = sph.common2.threads_per_input_primitive; +        break; +    } +    case Stage::Compute:          program.workgroup_size = env.WorkgroupSize();          program.shared_memory_size = env.SharedMemorySize(); +        break; +    default: +        break;      }      RemoveUnreachableBlocks(program); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 79293bd6b..eb6a80de2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -64,7 +64,7 @@ void TranslatorVisitor::ALD(u64 insn) {          BitField<8, 8, IR::Reg> index_reg;          BitField<20, 10, u64> absolute_offset;          BitField<20, 11, s64> relative_offset; -        BitField<39, 8, IR::Reg> array_reg; +        BitField<39, 8, IR::Reg> vertex_reg;          BitField<32, 1, u64> o;          BitField<31, 1, u64> patch;          BitField<47, 2, Size> size; @@ -80,15 +80,17 @@ void TranslatorVisitor::ALD(u64 insn) {      if (offset % 4 != 0) {          throw NotImplementedException("Unaligned absolute offset {}", offset);      } +    const IR::U32 vertex{X(ald.vertex_reg)};      const u32 num_elements{NumElements(ald.size)};      if (ald.index_reg == IR::Reg::RZ) {          for (u32 element = 0; element < num_elements; ++element) { -            F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); +            const IR::Attribute attr{offset / 4 + element}; +            F(ald.dest_reg + element, ir.GetAttribute(attr, vertex));          }          return;      }      HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { -        F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); +        F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex));      });  } @@ -100,7 +102,7 @@ void TranslatorVisitor::AST(u64 insn) {          BitField<20, 10, u64> absolute_offset;          BitField<20, 11, s64> relative_offset;          BitField<31, 1, u64> patch; -        BitField<39, 8, IR::Reg> array_reg; +        BitField<39, 8, IR::Reg> vertex_reg;          BitField<47, 2, Size> size;      } const ast{insn}; @@ -114,15 +116,17 @@ void TranslatorVisitor::AST(u64 insn) {      if (offset % 4 != 0) {          throw NotImplementedException("Unaligned absolute offset {}", offset);      } +    const IR::U32 vertex{X(ast.vertex_reg)};      const u32 num_elements{NumElements(ast.size)};      if (ast.index_reg == IR::Reg::RZ) {          for (u32 element = 0; element < num_elements; ++element) { -            ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); +            const IR::Attribute attr{offset / 4 + element}; +            ir.SetAttribute(attr, F(ast.src_reg + element), vertex);          }          return;      }      HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { -        ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); +        ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex);      });  } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a4e41bda1..06f1f59bd 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,14 @@ enum class AttributeType : u8 {      Disabled,  }; +enum class InputTopology { +    Points, +    Lines, +    LinesAdjacency, +    Triangles, +    TrianglesAdjacency, +}; +  struct Profile {      u32 supported_spirv{0x00010000}; @@ -46,6 +54,8 @@ struct Profile {      std::array<AttributeType, 32> generic_input_types{};      bool convert_depth_mode{}; +    InputTopology input_topology{}; +      std::optional<float> fixed_state_point_size;  }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b953d694b..f49add208 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -769,7 +769,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(          const size_t stage_index{index - 1};          infos[stage_index] = &program.info; -        const Shader::Profile profile{MakeProfile(key, program.stage)}; +        const Shader::Profile profile{MakeProfile(key, program)};          const std::vector<u32> code{EmitSPIRV(profile, program, binding)};          device.SaveShader(code);          modules[stage_index] = BuildShader(device, code); @@ -880,15 +880,59 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA  }  Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, -                                           Shader::Stage stage) { +                                           const Shader::IR::Program& program) {      Shader::Profile profile{base_profile}; -    if (stage == Shader::Stage::VertexB) { -        profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; -        if (key.state.topology == Maxwell::PrimitiveTopology::Points) { -            profile.fixed_state_point_size = Common::BitCast<float>(key.state.point_size); + +    const Shader::Stage stage{program.stage}; +    const bool has_geometry{key.unique_hashes[4] != u128{}}; +    const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; +    const float point_size{Common::BitCast<float>(key.state.point_size)}; +    switch (stage) { +    case Shader::Stage::VertexB: +        if (!has_geometry) { +            if (key.state.topology == Maxwell::PrimitiveTopology::Points) { +                profile.fixed_state_point_size = point_size; +            } +            profile.convert_depth_mode = gl_ndc;          }          std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(),                                 &CastAttributeType); +        break; +    case Shader::Stage::Geometry: +        if (program.output_topology == Shader::OutputTopology::PointList) { +            profile.fixed_state_point_size = point_size; +        } +        profile.convert_depth_mode = gl_ndc; +        break; +    default: +        break; +    } +    switch (key.state.topology) { +    case Maxwell::PrimitiveTopology::Points: +        profile.input_topology = Shader::InputTopology::Points; +        break; +    case Maxwell::PrimitiveTopology::Lines: +    case Maxwell::PrimitiveTopology::LineLoop: +    case Maxwell::PrimitiveTopology::LineStrip: +        profile.input_topology = Shader::InputTopology::Lines; +        break; +    case Maxwell::PrimitiveTopology::Triangles: +    case Maxwell::PrimitiveTopology::TriangleStrip: +    case Maxwell::PrimitiveTopology::TriangleFan: +    case Maxwell::PrimitiveTopology::Quads: +    case Maxwell::PrimitiveTopology::QuadStrip: +    case Maxwell::PrimitiveTopology::Polygon: +    case Maxwell::PrimitiveTopology::Patches: +        profile.input_topology = Shader::InputTopology::Triangles; +        break; +    case Maxwell::PrimitiveTopology::LinesAdjacency: +    case Maxwell::PrimitiveTopology::LineStripAdjacency: +        profile.input_topology = Shader::InputTopology::LinesAdjacency; +        break; +    case Maxwell::PrimitiveTopology::TrianglesAdjacency: +    case Maxwell::PrimitiveTopology::TriangleStripAdjacency: +        profile.input_topology = Shader::InputTopology::TrianglesAdjacency; +        break;      }      return profile;  } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 343ea1554..8b6839966 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -33,6 +33,10 @@ namespace Core {  class System;  } +namespace Shader::IR { +struct Program; +} +  namespace Vulkan {  using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -160,7 +164,8 @@ private:                                                             Shader::Environment& env,                                                             bool build_in_parallel); -    Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); +    Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, +                                const Shader::IR::Program& program);      Tegra::GPU& gpu;      Tegra::Engines::Maxwell3D& maxwell3d; | 
