diff options
| author | Liam <byteslice@airmail.cc> | 2022-11-30 17:16:00 -0500 | 
|---|---|---|
| committer | Liam <byteslice@airmail.cc> | 2022-12-01 09:51:27 -0500 | 
| commit | 3ef006b5abbe78bb2ae423a7cab74d7da2f8bc08 (patch) | |
| tree | bf0c9ed637dc39faea655fb836b7a48a9f5d28ee | |
| parent | d6b63239ae5011cbe8aa59b44bfd813e30f75d56 (diff) | |
shader_recompiler: add gl_Layer translation GS for older hardware
| -rw-r--r-- | src/shader_recompiler/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 81 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.h | 9 | ||||
| -rw-r--r-- | src/shader_recompiler/host_translate_info.h | 3 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/layer_pass.cpp | 68 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/passes.h | 1 | ||||
| -rw-r--r-- | src/shader_recompiler/shader_info.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 33 | 
9 files changed, 230 insertions, 6 deletions
| diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 545d69c7e..8cd584154 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC      ir_opt/dual_vertex_pass.cpp      ir_opt/global_memory_to_storage_buffer_pass.cpp      ir_opt/identity_removal_pass.cpp +    ir_opt/layer_pass.cpp      ir_opt/lower_fp16_to_fp32.cpp      ir_opt/lower_int64_to_int32.cpp      ir_opt/passes.h diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 376aae0ea..3adbd2b16 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -9,6 +9,7 @@  #include "common/settings.h"  #include "shader_recompiler/exception.h"  #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h"  #include "shader_recompiler/frontend/ir/post_order.h"  #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"  #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -233,6 +234,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo          Optimization::VerificationPass(program);      }      Optimization::CollectShaderInfoPass(env, program); +    Optimization::LayerPass(program, host_info); +      CollectInterpolationInfo(env, program);      AddNVNStorageBuffers(program);      return program; @@ -331,4 +334,82 @@ void ConvertLegacyToGeneric(IR::Program& program, const Shader::RuntimeInfo& run      }  } +IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, +                                        ObjectPool<IR::Block>& block_pool, +                                        const HostTranslateInfo& host_info, +                                        IR::Program& source_program, +                                        Shader::OutputTopology output_topology) { +    IR::Program program; +    program.stage = Stage::Geometry; +    program.output_topology = output_topology; +    switch (output_topology) { +    case OutputTopology::PointList: +        program.output_vertices = 1; +        break; +    case OutputTopology::LineStrip: +        program.output_vertices = 2; +        break; +    default: +        program.output_vertices = 3; +        break; +    } + +    program.is_geometry_passthrough = false; +    program.info.loads.mask = source_program.info.stores.mask; +    program.info.stores.mask = source_program.info.stores.mask; +    program.info.stores.Set(IR::Attribute::Layer, true); +    program.info.stores.Set(source_program.info.emulated_layer, false); + +    IR::Block* current_block = block_pool.Create(inst_pool); +    auto& node{program.syntax_list.emplace_back()}; +    node.type = IR::AbstractSyntaxNode::Type::Block; +    node.data.block = current_block; + +    IR::IREmitter ir{*current_block}; +    for (u32 i = 0; i < program.output_vertices; i++) { +        // Assign generics from input +        for (u32 j = 0; j < 32; j++) { +            if (!program.info.stores.Generic(j)) { +                continue; +            } + +            const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); +            ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); +            ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); +            ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); +            ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); +        } + +        // Assign position from input +        const IR::Attribute attr = IR::Attribute::PositionX; +        ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); +        ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); +        ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); +        ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); + +        // Assign layer +        ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer), +                        ir.Imm32(0)); + +        // Emit vertex +        ir.EmitVertex(ir.Imm32(0)); +    } +    ir.EndPrimitive(ir.Imm32(0)); + +    IR::Block* return_block{block_pool.Create(inst_pool)}; +    IR::IREmitter{*return_block}.Epilogue(); +    current_block->AddBranch(return_block); + +    auto& merge{program.syntax_list.emplace_back()}; +    merge.type = IR::AbstractSyntaxNode::Type::Block; +    merge.data.block = return_block; +    program.syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; + +    program.blocks = GenerateBlocks(program.syntax_list); +    program.post_order_blocks = PostOrder(program.syntax_list.front()); +    Optimization::SsaRewritePass(program); + +    return program; +} +  } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h index 02ede8c9c..497afe7cb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.h +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -25,4 +25,13 @@ namespace Shader::Maxwell {  void ConvertLegacyToGeneric(IR::Program& program, const RuntimeInfo& runtime_info); +// Maxwell v1 and older Nvidia cards don't support setting gl_Layer from non-geometry stages. +// This creates a workaround by setting the layer as a generic output and creating a +// passthrough geometry shader that reads the generic and sets the layer. +[[nodiscard]] IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, +                                                      ObjectPool<IR::Block>& block_pool, +                                                      const HostTranslateInfo& host_info, +                                                      IR::Program& source_program, +                                                      Shader::OutputTopology output_topology); +  } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index cc1500690..d5d279554 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -13,7 +13,8 @@ struct HostTranslateInfo {      bool support_float16{};      ///< True when the device supports 16-bit floats      bool support_int64{};        ///< True when the device supports 64-bit integers      bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered -    bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers +    bool support_snorm_render_buffer{};  ///< True when the device supports SNORM render buffers +    bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS  };  } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/layer_pass.cpp b/src/shader_recompiler/ir_opt/layer_pass.cpp new file mode 100644 index 000000000..4574f7cf2 --- /dev/null +++ b/src/shader_recompiler/ir_opt/layer_pass.cpp @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <algorithm> +#include <bit> +#include <optional> + +#include <boost/container/small_vector.hpp> + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/breadth_first_search.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { + +static IR::Attribute EmulatedLayerAttribute(VaryingState& stores) { +    for (u32 i = 0; i < 32; i++) { +        if (!stores.Generic(i)) { +            return IR::Attribute::Generic0X + (i * 4); +        } +    } +    return IR::Attribute::Layer; +} + +static bool PermittedProgramStage(Stage stage) { +    switch (stage) { +    case Stage::VertexA: +    case Stage::VertexB: +    case Stage::TessellationControl: +    case Stage::TessellationEval: +        return true; +    default: +        return false; +    } +} + +void LayerPass(IR::Program& program, const HostTranslateInfo& host_info) { +    if (host_info.support_viewport_index_layer || !PermittedProgramStage(program.stage)) { +        return; +    } + +    const auto end{program.post_order_blocks.end()}; +    const auto layer_attribute = EmulatedLayerAttribute(program.info.stores); +    bool requires_layer_emulation = false; + +    for (auto block = program.post_order_blocks.begin(); block != end; ++block) { +        for (IR::Inst& inst : (*block)->Instructions()) { +            if (inst.GetOpcode() == IR::Opcode::SetAttribute && +                inst.Arg(0).Attribute() == IR::Attribute::Layer) { +                requires_layer_emulation = true; +                inst.SetArg(0, IR::Value{layer_attribute}); +            } +        } +    } + +    if (requires_layer_emulation) { +        program.info.requires_layer_emulation = true; +        program.info.emulated_layer = layer_attribute; +        program.info.stores.Set(IR::Attribute::Layer, false); +        program.info.stores.Set(layer_attribute, true); +    } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 586a0668f..11bfe801a 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -23,6 +23,7 @@ void RescalingPass(IR::Program& program);  void SsaRewritePass(IR::Program& program);  void PositionPass(Environment& env, IR::Program& program);  void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); +void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);  void VerificationPass(const IR::Program& program);  // Dual Vertex diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index ee6252bb5..d9c6e92db 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -204,6 +204,9 @@ struct Info {      u32 nvn_buffer_base{};      std::bitset<16> nvn_buffer_used{}; +    bool requires_layer_emulation{}; +    IR::Attribute emulated_layer{}; +      boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>          constant_buffer_descriptors;      boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3fe04a115..a38060100 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -39,6 +39,7 @@ using Shader::Backend::GLASM::EmitGLASM;  using Shader::Backend::GLSL::EmitGLSL;  using Shader::Backend::SPIRV::EmitSPIRV;  using Shader::Maxwell::ConvertLegacyToGeneric; +using Shader::Maxwell::GenerateGeometryPassthrough;  using Shader::Maxwell::MergeDualVertexPrograms;  using Shader::Maxwell::TranslateProgram;  using VideoCommon::ComputeEnvironment; @@ -56,6 +57,17 @@ auto MakeSpan(Container& container) {      return std::span(container.data(), container.size());  } +Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) { +    switch (topology) { +    case Maxwell::PrimitiveTopology::Points: +        return Shader::OutputTopology::PointList; +    case Maxwell::PrimitiveTopology::LineStrip: +        return Shader::OutputTopology::LineStrip; +    default: +        return Shader::OutputTopology::TriangleStrip; +    } +} +  Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,                                      const Shader::IR::Program& program,                                      const Shader::IR::Program* previous_program, @@ -220,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo            .support_int64 = device.HasShaderInt64(),            .needs_demote_reorder = device.IsAmd(),            .support_snorm_render_buffer = false, +          .support_viewport_index_layer = device.HasVertexViewportLayer(),        } {      if (use_asynchronous_shaders) {          workers = CreateWorkers(); @@ -314,9 +327,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {      const auto& regs{maxwell3d->regs};      graphics_key.raw = 0;      graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0); -    graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 -                                              ? regs.draw.topology.Value() -                                              : Maxwell::PrimitiveTopology{}); +    graphics_key.gs_input_topology.Assign(regs.draw.topology.Value());      graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value());      graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value());      graphics_key.tessellation_clockwise.Assign( @@ -415,7 +426,19 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(      std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;      const bool uses_vertex_a{key.unique_hashes[0] != 0};      const bool uses_vertex_b{key.unique_hashes[1] != 0}; + +    // Layer passthrough generation for devices without GL_ARB_shader_viewport_layer_array +    Shader::IR::Program* layer_source_program{}; +      for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { +        const bool is_emulated_stage = layer_source_program != nullptr && +                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry); +        if (key.unique_hashes[index] == 0 && is_emulated_stage) { +            auto topology = MaxwellToOutputTopology(key.gs_input_topology); +            programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, +                                                          *layer_source_program, topology); +            continue; +        }          if (key.unique_hashes[index] == 0) {              continue;          } @@ -443,6 +466,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(                  Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors);              programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);          } + +        if (programs[index].info.requires_layer_emulation) { +            layer_source_program = &programs[index]; +        }      }      const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};      const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; @@ -456,7 +483,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(      const bool use_glasm{device.UseAssemblyShaders()};      const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;      for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { -        if (key.unique_hashes[index] == 0) { +        const bool is_emulated_stage = layer_source_program != nullptr && +                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry); +        if (key.unique_hashes[index] == 0 && !is_emulated_stage) {              continue;          }          UNIMPLEMENTED_IF(index == 0); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d4b0a542a..150413b04 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -46,6 +46,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);  namespace {  using Shader::Backend::SPIRV::EmitSPIRV;  using Shader::Maxwell::ConvertLegacyToGeneric; +using Shader::Maxwell::GenerateGeometryPassthrough;  using Shader::Maxwell::MergeDualVertexPrograms;  using Shader::Maxwell::TranslateProgram;  using VideoCommon::ComputeEnvironment; @@ -60,6 +61,17 @@ auto MakeSpan(Container& container) {      return std::span(container.data(), container.size());  } +Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) { +    switch (topology) { +    case Maxwell::PrimitiveTopology::Points: +        return Shader::OutputTopology::PointList; +    case Maxwell::PrimitiveTopology::LineStrip: +        return Shader::OutputTopology::LineStrip; +    default: +        return Shader::OutputTopology::TriangleStrip; +    } +} +  Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {      switch (comparison) {      case Maxwell::ComparisonOp::Never_D3D: @@ -327,6 +339,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device          .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||                                  driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,          .support_snorm_render_buffer = true, +        .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),      };  } @@ -509,7 +522,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(      std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;      const bool uses_vertex_a{key.unique_hashes[0] != 0};      const bool uses_vertex_b{key.unique_hashes[1] != 0}; + +    // Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer +    Shader::IR::Program* layer_source_program{}; +      for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { +        const bool is_emulated_stage = layer_source_program != nullptr && +                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry); +        if (key.unique_hashes[index] == 0 && is_emulated_stage) { +            auto topology = MaxwellToOutputTopology(key.state.topology); +            programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, +                                                          *layer_source_program, topology); +            continue; +        }          if (key.unique_hashes[index] == 0) {              continue;          } @@ -530,6 +555,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(              auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};              programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);          } + +        if (programs[index].info.requires_layer_emulation) { +            layer_source_program = &programs[index]; +        }      }      std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};      std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; @@ -538,7 +567,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(      Shader::Backend::Bindings binding;      for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;           ++index) { -        if (key.unique_hashes[index] == 0) { +        const bool is_emulated_stage = layer_source_program != nullptr && +                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry); +        if (key.unique_hashes[index] == 0 && !is_emulated_stage) {              continue;          }          UNIMPLEMENTED_IF(index == 0); | 
