diff options
Diffstat (limited to 'src/shader_recompiler')
13 files changed, 304 insertions, 23 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 07e75f9d8..83b763447 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -245,8 +245,6 @@ target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) if (MSVC) target_compile_options(shader_recompiler PRIVATE - /W4 - /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data /we4800 # Implicit conversion from 'type' to bool. Possible information loss diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 85ee27333..d0e308124 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& derivatives, const IR::Value& offset, const IR::Value& lod_clamp) { const auto info{inst.Flags<IR::TextureInstInfo>()}; - ScopedRegister dpdx, dpdy; + ScopedRegister dpdx, dpdy, coords; const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; if (multi_component) { // Allocate this early to avoid aliasing other registers dpdx = ScopedRegister{ctx.reg_alloc}; dpdy = ScopedRegister{ctx.reg_alloc}; + if (info.num_derivates >= 3) { + coords = ScopedRegister{ctx.reg_alloc}; + } } const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; @@ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, "MOV.F {}.y,{}.w;", dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, dpdy.reg, derivatives_vec); + Register final_coord; + if (info.num_derivates >= 3) { + ctx.Add("MOV.F {}.z,{}.x;" + "MOV.F {}.z,{}.y;", + dpdx.reg, coord_vec, dpdy.reg, coord_vec); + ctx.Add("MOV.F {}.x,0;" + "MOV.F {}.y,0;", + "MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg); + final_coord = coords.reg; + } else { + final_coord = coord_vec; + } if (info.has_lod_clamp) { const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; ctx.Add("MOV.F {}.w,{};" "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", - dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg, texture, type, offset_vec); } else { - ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, - texture, type, offset_vec); + ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg, + dpdy.reg, texture, type, offset_vec); } } else { ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 418505475..d9872ecc2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, if (sparse_inst) { throw NotImplementedException("EmitImageGradient Sparse"); } - if (!offset.IsEmpty()) { + if (!offset.IsEmpty() && info.num_derivates <= 2) { throw NotImplementedException("EmitImageGradient offset"); } const auto texture{Texture(ctx, info, index)}; @@ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; if (multi_component) { + if (info.num_derivates >= 3) { + const auto offset_vec{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, + coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); + return; + } ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, derivatives_vec, derivatives_vec); } else { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 7d901c04b..8decdf399 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -91,6 +91,34 @@ public: } } + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, + Id offset, Id lod_clamp) { + if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { + throw LogicError("Derivates must be present"); + } + boost::container::static_vector<Id, 3> deriv_1_accum{ + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), + ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), + }; + boost::container::static_vector<Id, 3> deriv_2_accum{ + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), + ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), + }; + const Id derivates_id1{ctx.OpCompositeConstruct( + ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; + const Id derivates_id2{ctx.OpCompositeConstruct( + ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + std::span<const Id> Span() const noexcept { return std::span{operands.data(), operands.size()}; } @@ -176,9 +204,7 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind if (def.count > 1) { throw NotImplementedException("Indirect texture sample"); } - const Id sampler_id{def.id}; - const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; - return ctx.OpImage(ctx.image_buffer_type, id); + return ctx.OpLoad(ctx.image_buffer_type, def.id); } else { const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; if (def.count > 1) { @@ -524,8 +550,11 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp) { const auto info{inst->Flags<IR::TextureInstInfo>()}; - const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, - offset, lod_clamp); + const auto operands = + info.num_derivates == 3 + ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) + : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, + lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index bec5db173..72f69b7aa 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -74,6 +74,11 @@ spv::ImageFormat GetImageFormat(ImageFormat format) { throw InvalidArgument("Invalid image format {}", format); } +spv::ImageFormat GetImageFormatForBuffer(ImageFormat format) { + const auto spv_format = GetImageFormat(format); + return spv_format == spv::ImageFormat::Unknown ? spv::ImageFormat::R32ui : spv_format; +} + Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { const spv::ImageFormat format{GetImageFormat(desc.format)}; const Id type{ctx.U32[1]}; @@ -1242,9 +1247,8 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { } const spv::ImageFormat format{spv::ImageFormat::Unknown}; image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); - sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); - const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + const Id type{TypePointer(spv::StorageClass::UniformConstant, image_buffer_type)}; texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { if (desc.count != 1) { @@ -1271,7 +1275,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { if (desc.count != 1) { throw NotImplementedException("Array of image buffers"); } - const spv::ImageFormat format{GetImageFormat(desc.format)}; + const spv::ImageFormat format{GetImageFormatForBuffer(desc.format)}; const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index e63330f11..7c49fd504 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -206,7 +206,6 @@ public: Id output_u32{}; Id image_buffer_type{}; - Id sampled_texture_buffer_type{}; Id image_u32{}; std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 69035d462..1e9e8c8f5 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -42,6 +42,7 @@ union TextureInstInfo { BitField<23, 2, u32> gather_component; BitField<25, 2, u32> num_derivates; BitField<27, 3, ImageFormat> image_format; + BitField<30, 1, u32> ndv_is_active; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index ef4ffa54b..f00e20023 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) { } const fswzadd{insn}; if (fswzadd.ndv != 0) { - throw NotImplementedException("FSWZADD NDV"); + LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode"); } const IR::F32 src_a{GetFloatReg8(insn)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 82aec3b73..1ddfeab06 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa BitField<12, 4, u64> mov32i_mask; } const mov{insn}; - if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { - throw NotImplementedException("Non-full move mask"); + u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask; + if (mask != 0xf && mask != 0x1) { + LOG_WARNING(Shader, "(STUBBED) Masked Mov"); + return; } v.X(mov.dest_reg, src); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 753c62098..e593132e6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -161,7 +161,8 @@ enum class SpecialRegister : u64 { LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); return ir.Imm32(0); // This is the default value hardware returns. default: - throw NotImplementedException("S2R special register {}", special_register); + LOG_CRITICAL(Shader, "(STUBBED) Special register {}", special_register); + return ir.Imm32(0); // This is the default value hardware returns. } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2f930f1ea..6203003b3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) { } void TranslatorVisitor::RAM(u64) { - ThrowNotImplemented(Opcode::RAM); + LOG_WARNING(Shader, "(STUBBED) RAM Instruction"); } void TranslatorVisitor::RET(u64) { @@ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) { } void TranslatorVisitor::SAM(u64) { - ThrowNotImplemented(Opcode::SAM); + LOG_WARNING(Shader, "(STUBBED) SAM Instruction"); } void TranslatorVisitor::SETCRSPTR(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 2459fc30d..7a9b7fff8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, info.is_depth.Assign(tex.dc != 0 ? 1 : 0); info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); info.has_lod_clamp.Assign(lc ? 1 : 0); + info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0); const IR::Value sample{[&]() -> IR::Value { if (tex.dc == 0) { diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 4d81e9336..f46e55122 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" @@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) { } } +void FoldFPAdd32(IR::Inst& inst) { + if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) { + return; + } + const IR::Value lhs_value{inst.Arg(0)}; + const IR::Value rhs_value{inst.Arg(1)}; + const auto check_neutral = [](const IR::Value& one_operand) { + return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f; + }; + if (check_neutral(lhs_value)) { + inst.ReplaceUsesWith(rhs_value); + } + if (check_neutral(rhs_value)) { + inst.ReplaceUsesWith(lhs_value); + } +} + +bool FoldDerivateYFromCorrection(IR::Inst& inst) { + const IR::Value lhs_value{inst.Arg(0)}; + const IR::Value rhs_value{inst.Arg(1)}; + IR::Inst* const lhs_op{lhs_value.InstRecursive()}; + IR::Inst* const rhs_op{rhs_value.InstRecursive()}; + if (lhs_op->GetOpcode() == IR::Opcode::YDirection) { + if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) { + return false; + } + inst.ReplaceUsesWith(rhs_value); + return true; + } + if (rhs_op->GetOpcode() != IR::Opcode::YDirection) { + return false; + } + if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) { + return false; + } + inst.ReplaceUsesWith(lhs_value); + return true; +} + void FoldFPMul32(IR::Inst& inst) { + if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) { + return; + } const auto control{inst.Flags<IR::FpControl>()}; if (control.no_contraction) { return; @@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) { if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { return; } + if (FoldDerivateYFromCorrection(inst)) { + return; + } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; IR::Inst* const rhs_op{rhs_value.InstRecursive()}; if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || @@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { } const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; if (value_2 != value_3) { - return; + if (!value_2.IsImmediate() || !value_3.IsImmediate()) { + return; + } + if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) { + return; + } } const IR::Value index{inst2->Arg(1)}; const IR::Value clamp{inst2->Arg(2)}; @@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { } } +bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { + if (coord.IsImmediate()) { + return false; + } + const auto check_through_shuffle = [](IR::Value input, IR::Value& result) { + const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)}; + IR::Inst* const inst2{value_1.InstRecursive()}; + if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) { + return false; + } + const IR::Value index{inst2->Arg(1).Resolve()}; + const IR::Value clamp{inst2->Arg(2).Resolve()}; + const IR::Value segmentation_mask{inst2->Arg(3).Resolve()}; + if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { + return false; + } + if (index.U32() != 3 && clamp.U32() != 3) { + return false; + } + result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32); + return true; + }; + IR::Inst* const inst = coord.InstRecursive(); + if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) { + return false; + } + std::array<IR::Value, 3> temporary_values; + IR::Value value_1 = inst->Arg(0).Resolve(); + IR::Value value_2 = inst->Arg(1).Resolve(); + IR::Value value_3 = inst->Arg(2).Resolve(); + std::array<u32, 4> swizzles_mask_a{}; + std::array<u32, 4> swizzles_mask_b{}; + const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) { + u32 value = mask.U32(); + for (size_t i = 0; i < 4; i++) { + mask_results[i] = (value >> (i * 2)) & 0x3; + } + }; + resolve_mask(swizzles_mask_a, value_3); + size_t coordinate_index = 0; + const auto resolve_pending = [&](IR::Value resolve_v) { + IR::Inst* const inst_r = resolve_v.InstRecursive(); + if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) { + return false; + } + if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) { + return false; + } + if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) { + return false; + } + resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve()); + return true; + }; + if (value_1.IsImmediate() || value_2.IsImmediate()) { + return false; + } + bool should_continue = false; + if (resolve_pending(value_1)) { + should_continue = check_through_shuffle(value_2, temporary_values[0]); + coordinate_index = 0; + } + if (resolve_pending(value_2)) { + should_continue = check_through_shuffle(value_1, temporary_values[0]); + coordinate_index = 2; + } + if (!should_continue) { + return false; + } + // figure which is which + size_t zero_mask_a = 0; + size_t zero_mask_b = 0; + for (size_t i = 0; i < 4; i++) { + if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) { + // last operand can be inversed, we cannot determine a result. + return false; + } + zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i; + zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i; + } + static constexpr size_t ddx_pattern = 0b1010; + static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111; + if (std::popcount(zero_mask_a) != 2) { + return false; + } + if (std::popcount(zero_mask_b) != 2) { + return false; + } + if (zero_mask_a == zero_mask_b) { + return false; + } + results[0] = temporary_values[coordinate_index]; + + if (coordinate_index == 0) { + if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) { + results[1] = temporary_values[1]; + results[2] = temporary_values[2]; + return true; + } + results[2] = temporary_values[1]; + results[1] = temporary_values[2]; + } else { + const auto assign_result = [&results](IR::Value temporary_value, size_t mask) { + if (mask == ddx_pattern || mask == ddx_pattern_inv) { + results[1] = temporary_value; + return; + } + results[2] = temporary_value; + }; + assign_result(temporary_values[1], zero_mask_b); + assign_result(temporary_values[0], zero_mask_a); + } + + return true; +} + +void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { + IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>(); + auto orig_opcode = inst.GetOpcode(); + if (info.ndv_is_active == 0) { + return; + } + if (info.type != TextureType::Color3D) { + return; + } + const IR::Value handle{inst.Arg(0)}; + const IR::Value coords{inst.Arg(1)}; + const IR::Value bias_lc{inst.Arg(2)}; + const IR::Value offset{inst.Arg(3)}; + if (!offset.IsImmediate()) { + return; + } + IR::Inst* const inst2 = coords.InstRecursive(); + std::array<std::array<IR::Value, 3>, 3> results_matrix; + for (size_t i = 0; i < 3; i++) { + if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { + return; + } + } + IR::F32 lod_clamp{}; + if (info.has_lod_clamp != 0) { + if (!bias_lc.IsImmediate()) { + lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()}; + } else { + lod_clamp = IR::F32{bias_lc}; + } + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + IR::Value new_coords = + ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]); + IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], + results_matrix[1][1], results_matrix[1][2]); + IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); + info.num_derivates.Assign(3); + IR::Value new_gradient_instruction = + ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); + IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); + if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) { + new_inst->ReplaceOpcode(IR::Opcode::ImageGradient); + } + inst.ReplaceUsesWith(new_gradient_instruction); +} + void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { const IR::Value bank{inst.Arg(0)}; const IR::Value offset{inst.Arg(1)}; @@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); + case IR::Opcode::FPNeg32: + FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); + return; + case IR::Opcode::FPAdd32: + FoldFPAdd32(inst); + return; case IR::Opcode::FPMul32: return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: @@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { FoldDriverConstBuffer(env, block, inst, 1); } break; + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + FoldImageSampleImplicitLod(block, inst); + break; default: break; } |