diff options
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r-- | src/shader_recompiler/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | 10 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 44 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 17 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 61 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/spirv_emit_context.h | 16 | ||||
-rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 6 | ||||
-rw-r--r-- | src/shader_recompiler/host_translate_info.h | 3 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp | 44 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp | 185 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/passes.h | 2 | ||||
-rw-r--r-- | src/shader_recompiler/profile.h | 3 | ||||
-rw-r--r-- | src/shader_recompiler/runtime_info.h | 2 |
13 files changed, 345 insertions, 50 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 525b2363c..07e75f9d8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate_program.h host_translate_info.h ir_opt/collect_shader_info_pass.cpp + ir_opt/conditional_barrier_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/dual_vertex_pass.cpp @@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC ir_opt/identity_removal_pass.cpp ir_opt/layer_pass.cpp ir_opt/lower_fp16_to_fp32.cpp + ir_opt/lower_fp64_to_fp32.cpp ir_opt/lower_int64_to_int32.cpp ir_opt/passes.h ir_opt/position_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 4b3043b65..0ce73f289 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -69,6 +69,11 @@ Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (!ctx.profile.support_descriptor_aliasing) { + LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic."); + return ctx.ConstantNull(ctx.U64); + } + if (ctx.profile.support_int64_atomics) { const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, binding, offset, sizeof(u64))}; @@ -86,6 +91,11 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (!ctx.profile.support_descriptor_aliasing) { + LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic."); + return ctx.ConstantNull(ctx.U32[2]); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 07c2b7b8a..2868fc57d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,27 +10,6 @@ namespace Shader::Backend::SPIRV { namespace { -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - -std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; - switch (type) { - case AttributeType::Float: - return AttrInfo{ctx.input_f32, ctx.F32[1], false}; - case AttributeType::UnsignedInt: - return AttrInfo{ctx.input_u32, ctx.U32[1], true}; - case AttributeType::SignedInt: - return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; - case AttributeType::Disabled: - return std::nullopt; - } - throw InvalidArgument("Invalid attribute type {}", type); -} - template <typename... Args> Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { switch (ctx.stage) { @@ -302,15 +281,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast<u32>(attr) % 4}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + const auto& generic{ctx.input_generics.at(index)}; + if (!ValidId(generic.id)) { // Attribute is disabled or varying component is not written return ctx.Const(element == 3 ? 1.0f : 0.0f); } - const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; - const Id value{ctx.OpLoad(type->id, pointer)}; - return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + const Id pointer{ + AttrPointer(ctx, generic.pointer_type, vertex, generic.id, ctx.Const(element))}; + const Id value{ctx.OpLoad(generic.component_type, pointer)}; + return [&ctx, generic, value]() { + switch (generic.load_op) { + case InputGenericLoadOp::Bitcast: + return ctx.OpBitcast(ctx.F32[1], value); + case InputGenericLoadOp::SToF: + return ctx.OpConvertSToF(ctx.F32[1], value); + case InputGenericLoadOp::UToF: + return ctx.OpConvertUToF(ctx.F32[1], value); + default: + return value; + }; + }(); } switch (attr) { case IR::Attribute::PrimitiveId: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c5db19d09..77ff8c573 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -17,7 +17,22 @@ Id GetThreadId(EmitContext& ctx) { Id WarpExtract(EmitContext& ctx, Id value) { const Id thread_id{GetThreadId(ctx)}; const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; - return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); + if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) { + const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))}; + const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))}; + const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))}; + const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)), + ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))}; + const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)}; + const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)}; + const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)}; + return c0_or_c1_or_c2_or_c3; + } else { + return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); + } } Id LoadMask(EmitContext& ctx, Id mask) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 47739794f..fd15f47ea 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -25,12 +25,6 @@ enum class Operation { FPMax, }; -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; @@ -206,23 +200,37 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { return ctx.TypeVector(ctx.TypeInt(32, true), 4); case AttributeType::UnsignedInt: return ctx.U32[4]; + case AttributeType::SignedScaled: + return ctx.profile.support_scaled_attributes ? ctx.F32[4] + : ctx.TypeVector(ctx.TypeInt(32, true), 4); + case AttributeType::UnsignedScaled: + return ctx.profile.support_scaled_attributes ? ctx.F32[4] : ctx.U32[4]; case AttributeType::Disabled: break; } throw InvalidArgument("Invalid attribute type {}", type); } -std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; +InputGenericInfo GetAttributeInfo(EmitContext& ctx, AttributeType type, Id id) { switch (type) { case AttributeType::Float: - return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + return InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}; case AttributeType::UnsignedInt: - return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + return InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::Bitcast}; case AttributeType::SignedInt: - return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + return InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), + InputGenericLoadOp::Bitcast}; + case AttributeType::SignedScaled: + return ctx.profile.support_scaled_attributes + ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} + : InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), + InputGenericLoadOp::SToF}; + case AttributeType::UnsignedScaled: + return ctx.profile.support_scaled_attributes + ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} + : InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::UToF}; case AttributeType::Disabled: - return std::nullopt; + return InputGenericInfo{}; } throw InvalidArgument("Invalid attribute type {}", type); } @@ -746,18 +754,29 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast<u32>(index))}; - if (!type) { + const auto& generic{input_generics.at(index)}; + const Id generic_id{generic.id}; + if (!ValidId(generic_id)) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(index)}; - const Id pointer{is_array - ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) - : OpAccessChain(type->pointer, generic_id, masked_index)}; - const Id value{OpLoad(type->id, pointer)}; - const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; + const Id pointer{ + is_array ? OpAccessChain(generic.pointer_type, generic_id, vertex, masked_index) + : OpAccessChain(generic.pointer_type, generic_id, masked_index)}; + const Id value{OpLoad(generic.component_type, pointer)}; + const Id result{[this, generic, value]() { + switch (generic.load_op) { + case InputGenericLoadOp::Bitcast: + return OpBitcast(F32[1], value); + case InputGenericLoadOp::SToF: + return OpConvertSToF(F32[1], value); + case InputGenericLoadOp::UToF: + return OpConvertUToF(F32[1], value); + default: + return value; + }; + }()}; OpReturnValue(result); ++label_index; } @@ -1457,7 +1476,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); Name(id, fmt::format("in_attr{}", index)); - input_generics[index] = id; + input_generics[index] = GetAttributeInfo(*this, input_type, id); if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { Decorate(id, spv::Decoration::PassthroughNV); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 768a4fbb5..e63330f11 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -95,6 +95,20 @@ struct StorageDefinitions { Id U32x4{}; }; +enum class InputGenericLoadOp { + None, + Bitcast, + SToF, + UToF, +}; + +struct InputGenericInfo { + Id id; + Id pointer_type; + Id component_type; + InputGenericLoadOp load_op; +}; + struct GenericElementInfo { Id id{}; u32 first_element{}; @@ -283,7 +297,7 @@ public: bool need_input_position_indirect{}; Id input_position{}; - std::array<Id, 32> input_generics{}; + std::array<InputGenericInfo, 32> input_generics{}; Id output_point_size{}; Id output_position{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 17a6d4888..928b35561 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -280,12 +280,18 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo RemoveUnreachableBlocks(program); // Replace instructions before the SSA rewrite + if (!host_info.support_float64) { + Optimization::LowerFp64ToFp32(program); + } if (!host_info.support_float16) { Optimization::LowerFp16ToFp32(program); } if (!host_info.support_int64) { Optimization::LowerInt64ToInt32(program); } + if (!host_info.support_conditional_barrier) { + Optimization::ConditionalBarrierPass(program); + } Optimization::SsaRewritePass(program); Optimization::ConstantPropagationPass(env, program); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 2aaa6c5ea..7d2ded907 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -10,6 +10,7 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { + bool support_float64{}; ///< True when the device supports 64-bit floats bool support_float16{}; ///< True when the device supports 16-bit floats bool support_int64{}; ///< True when the device supports 64-bit integers bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered @@ -17,6 +18,8 @@ struct HostTranslateInfo { bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry ///< passthrough shaders + bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional + ///< control flow }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp new file mode 100644 index 000000000..c3ed27f4f --- /dev/null +++ b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void ConditionalBarrierPass(IR::Program& program) { + s32 conditional_control_flow_count{0}; + s32 conditional_return_count{0}; + for (IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::If: + case IR::AbstractSyntaxNode::Type::Loop: + conditional_control_flow_count++; + break; + case IR::AbstractSyntaxNode::Type::EndIf: + case IR::AbstractSyntaxNode::Type::Repeat: + conditional_control_flow_count--; + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + case IR::AbstractSyntaxNode::Type::Return: + if (conditional_control_flow_count > 0) { + conditional_return_count++; + } + break; + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + if ((conditional_control_flow_count > 0 || conditional_return_count > 0) && + inst.GetOpcode() == IR::Opcode::Barrier) { + LOG_WARNING(Shader, "Barrier within conditional control flow"); + inst.ReplaceOpcode(IR::Opcode::Identity); + } + } + break; + default: + break; + } + } + ASSERT(conditional_control_flow_count == 0); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp new file mode 100644 index 000000000..5db7a38ad --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp @@ -0,0 +1,185 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { + +constexpr s32 F64ToF32Exp = +1023 - 127; +constexpr s32 F32ToF64Exp = +127 - 1023; + +IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) { + const IR::U32 lo{ir.CompositeExtract(packed, 0)}; + const IR::U32 hi{ir.CompositeExtract(packed, 1)}; + const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))}; + const IR::U32 mantissa{ + ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)}; + const IR::U32 result{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))}; + return ir.BitCast<IR::F32>(result); +} + +IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) { + const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))}; + const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))}; + const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)}; + const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))}; + const IR::U32 hi{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))}; + return ir.CompositeConstruct(lo, hi); +} + +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs64: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd64: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil64: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor64: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma64: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul64: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg64: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven64: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate64: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp64: + return IR::Opcode::FPClamp32; + case IR::Opcode::FPTrunc64: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF64x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF64x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF64x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF64x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF64x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF64x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF64x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF64x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF64x4: + return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::FPOrdEqual64: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual64: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual64: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual64: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan64: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan64: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan64: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan64: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual64: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual64: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual64: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual64: + return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan64: + return IR::Opcode::FPIsNan32; + case IR::Opcode::ConvertS16F64: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F64: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F64: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F64: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F64: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F64: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::ConvertF32F64: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64F32: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF64S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF64S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF64S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF64U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF64U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF64U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF64U64: + return IR::Opcode::ConvertF32U64; + default: + return op; + } +} + +void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0))); + break; + } + case IR::Opcode::UnpackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0))); + break; + } + default: + inst.ReplaceOpcode(Replace(inst.GetOpcode())); + break; + } +} + +} // Anonymous namespace + +void LowerFp64ToFp32(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1f8f2ba95..629d18fa1 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -13,10 +13,12 @@ struct HostTranslateInfo; namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); +void ConditionalBarrierPass(IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); +void LowerFp64ToFp32(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); void RescalingPass(IR::Program& program); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9f88fb440..9ca97f6a4 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -43,6 +43,7 @@ struct Profile { bool support_gl_variable_aoffi{}; bool support_gl_sparse_textures{}; bool support_gl_derivative_control{}; + bool support_scaled_attributes{}; bool warp_size_potentially_larger_than_guest{}; @@ -77,6 +78,8 @@ struct Profile { bool has_gl_bool_ref_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; + /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs + bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; u32 gl_max_compute_smem_size{}; }; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 549b81ef7..3b63c249f 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -17,6 +17,8 @@ enum class AttributeType : u8 { Float, SignedInt, UnsignedInt, + SignedScaled, + UnsignedScaled, Disabled, }; |