diff options
| author | bunnei <bunneidev@gmail.com> | 2021-09-01 17:36:26 -0700 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-09-01 17:36:26 -0700 | 
| commit | b2572a56d3ab3c327f766d718bbdd16be6bc1635 (patch) | |
| tree | c02a5e36766a4f94338c015f7c560e6bca63656f | |
| parent | 25444041d0cb71ce861e2df8a468901b39dfe266 (diff) | |
| parent | 907dfbea71bbfd92290d1eff1d2f0f7a33b32dc1 (diff) | |
Merge pull request #6900 from ameerj/attr-reorder
structured_control_flow: Add DemoteCombinationPass
7 files changed, 140 insertions, 10 deletions
| diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -20,6 +20,7 @@  #include "shader_recompiler/frontend/maxwell/decode.h"  #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"  #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/host_translate_info.h"  #include "shader_recompiler/object_pool.h"  namespace Shader::Maxwell { @@ -652,7 +653,7 @@ class TranslatePass {  public:      TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,                    ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, -                  IR::AbstractSyntaxList& syntax_list_) +                  IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)          : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},            syntax_list{syntax_list_} {          Visit(root_stmt, nullptr, nullptr); @@ -660,6 +661,9 @@ public:          IR::Block& first_block{*syntax_list.front().data.block};          IR::IREmitter ir(first_block, first_block.begin());          ir.Prologue(); +        if (uses_demote_to_helper && host_info.needs_demote_reorder) { +            DemoteCombinationPass(); +        }      }  private: @@ -809,7 +813,14 @@ private:              }              case StatementType::Return: {                  ensure_block(); -                IR::IREmitter{*current_block}.Epilogue(); +                IR::Block* return_block{block_pool.Create(inst_pool)}; +                IR::IREmitter{*return_block}.Epilogue(); +                current_block->AddBranch(return_block); + +                auto& merge{syntax_list.emplace_back()}; +                merge.type = IR::AbstractSyntaxNode::Type::Block; +                merge.data.block = return_block; +                  current_block = nullptr;                  syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;                  break; @@ -824,6 +835,7 @@ private:                  auto& merge{syntax_list.emplace_back()};                  merge.type = IR::AbstractSyntaxNode::Type::Block;                  merge.data.block = demote_block; +                uses_demote_to_helper = true;                  break;              }              case StatementType::Unreachable: { @@ -855,11 +867,117 @@ private:          return block_pool.Create(inst_pool);      } +    void DemoteCombinationPass() { +        using Type = IR::AbstractSyntaxNode::Type; +        std::vector<IR::Block*> demote_blocks; +        std::vector<IR::U1> demote_conds; +        u32 num_epilogues{}; +        u32 branch_depth{}; +        for (const IR::AbstractSyntaxNode& node : syntax_list) { +            if (node.type == Type::If) { +                ++branch_depth; +            } +            if (node.type == Type::EndIf) { +                --branch_depth; +            } +            if (node.type != Type::Block) { +                continue; +            } +            if (branch_depth > 1) { +                // Skip reordering nested demote branches. +                continue; +            } +            for (const IR::Inst& inst : node.data.block->Instructions()) { +                const IR::Opcode op{inst.GetOpcode()}; +                if (op == IR::Opcode::DemoteToHelperInvocation) { +                    demote_blocks.push_back(node.data.block); +                    break; +                } +                if (op == IR::Opcode::Epilogue) { +                    ++num_epilogues; +                } +            } +        } +        if (demote_blocks.size() == 0) { +            return; +        } +        if (num_epilogues > 1) { +            LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); +            return; +        } +        s64 last_iterator_offset{}; +        auto& asl{syntax_list}; +        for (const IR::Block* demote_block : demote_blocks) { +            const auto start_it{asl.begin() + last_iterator_offset}; +            auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { +                return asn.type == Type::If && asn.data.if_node.body == demote_block; +            })}; +            if (asl_it == asl.end()) { +                // Demote without a conditional branch. +                // No need to proceed since all fragment instances will be demoted regardless. +                return; +            } +            const IR::Block* const end_if = asl_it->data.if_node.merge; +            demote_conds.push_back(asl_it->data.if_node.cond); +            last_iterator_offset = std::distance(asl.begin(), asl_it); + +            asl_it = asl.erase(asl_it); +            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { +                return asn.type == Type::Block && asn.data.block == demote_block; +            }); + +            asl_it = asl.erase(asl_it); +            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { +                return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; +            }); +            asl_it = asl.erase(asl_it); +        } +        const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { +            if (asn.type != Type::Block) { +                return false; +            } +            for (const auto& inst : asn.data.block->Instructions()) { +                if (inst.GetOpcode() == IR::Opcode::Epilogue) { +                    return true; +                } +            } +            return false; +        }}; +        const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; +        const auto return_block_it{(reverse_it + 1).base()}; + +        IR::IREmitter ir{*(return_block_it - 1)->data.block}; +        IR::U1 cond(IR::Value(false)); +        for (const auto& demote_cond : demote_conds) { +            cond = ir.LogicalOr(cond, demote_cond); +        } +        cond.Inst()->DestructiveAddUsage(1); + +        IR::AbstractSyntaxNode demote_if_node{}; +        demote_if_node.type = Type::If; +        demote_if_node.data.if_node.cond = cond; +        demote_if_node.data.if_node.body = demote_blocks[0]; +        demote_if_node.data.if_node.merge = return_block_it->data.block; + +        IR::AbstractSyntaxNode demote_node{}; +        demote_node.type = Type::Block; +        demote_node.data.block = demote_blocks[0]; + +        IR::AbstractSyntaxNode demote_endif_node{}; +        demote_endif_node.type = Type::EndIf; +        demote_endif_node.data.end_if.merge = return_block_it->data.block; + +        asl.insert(return_block_it, demote_endif_node); +        asl.insert(return_block_it, demote_node); +        asl.insert(return_block_it, demote_if_node); +    } +      ObjectPool<Statement>& stmt_pool;      ObjectPool<IR::Inst>& inst_pool;      ObjectPool<IR::Block>& block_pool;      Environment& env;      IR::AbstractSyntaxList& syntax_list; +    bool uses_demote_to_helper{};  // TODO: C++20 Remove this when all compilers support constexpr std::vector  #if __cpp_lib_constexpr_vector >= 201907 @@ -871,12 +989,13 @@ private:  } // Anonymous namespace  IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, -                                Environment& env, Flow::CFG& cfg) { +                                Environment& env, Flow::CFG& cfg, +                                const HostTranslateInfo& host_info) {      ObjectPool<Statement> stmt_pool{64};      GotoPass goto_pass{cfg, stmt_pool};      Statement& root{goto_pass.RootStatement()};      IR::AbstractSyntaxList syntax_list; -    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; +    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};      return syntax_list;  } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,10 +11,13 @@  #include "shader_recompiler/frontend/maxwell/control_flow.h"  #include "shader_recompiler/object_pool.h" -namespace Shader::Maxwell { +namespace Shader { +struct HostTranslateInfo; +namespace Maxwell {  [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,                                                ObjectPool<IR::Block>& block_pool, Environment& env, -                                              Flow::CFG& cfg); +                                              Flow::CFG& cfg, const HostTranslateInfo& host_info); -} // namespace Shader::Maxwell +} // namespace Maxwell +} // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {  IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,                               Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {      IR::Program program; -    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); +    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);      program.blocks = GenerateBlocks(program.syntax_list);      program.post_order_blocks = PostOrder(program.syntax_list.front());      program.stage = env.ShaderStage(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -11,8 +11,9 @@ namespace Shader {  /// Misc information about the host  struct HostTranslateInfo { -    bool support_float16{}; ///< True when the device supports 16-bit floats -    bool support_int64{};   ///< True when the device supports 64-bit integers +    bool support_float16{};      ///< True when the device supports 16-bit floats +    bool support_int64{};        ///< True when the device supports 64-bit integers +    bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered  };  } // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee992aed4..de9e41659 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -156,6 +156,10 @@ public:          return shader_backend;      } +    bool IsAmd() const { +        return vendor_name == "ATI Technologies Inc."; +    } +  private:      static bool TestVariableAoffi();      static bool TestPreciseBug(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1f4dda17e..b0e14182e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo        host_info{            .support_float16 = false,            .support_int64 = device.HasShaderInt64(), +          .needs_demote_reorder = device.IsAmd(),        } {      if (use_asynchronous_shaders) {          workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f316c4f92..31bfbcb06 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw      host_info = Shader::HostTranslateInfo{          .support_float16 = device.IsFloat16Supported(),          .support_int64 = device.IsShaderInt64Supported(), +        .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || +                                driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,      };  } | 
