From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 9 Jan 2021 03:30:07 -0300 Subject: shader: Initial recompiler work --- .../frontend/maxwell/control_flow.cpp | 531 ++++++++++ .../frontend/maxwell/control_flow.h | 137 +++ src/shader_recompiler/frontend/maxwell/decode.cpp | 149 +++ src/shader_recompiler/frontend/maxwell/decode.h | 14 + .../frontend/maxwell/instruction.h | 62 ++ src/shader_recompiler/frontend/maxwell/location.h | 106 ++ src/shader_recompiler/frontend/maxwell/maxwell.inc | 285 +++++ src/shader_recompiler/frontend/maxwell/opcode.cpp | 26 + src/shader_recompiler/frontend/maxwell/opcode.h | 30 + src/shader_recompiler/frontend/maxwell/program.cpp | 69 ++ src/shader_recompiler/frontend/maxwell/program.h | 39 + .../frontend/maxwell/termination_code.cpp | 79 ++ .../frontend/maxwell/termination_code.h | 16 + .../frontend/maxwell/translate/impl/exit.cpp | 15 + .../impl/floating_point_conversion_integer.cpp | 133 +++ .../impl/floating_point_multi_function.cpp | 71 ++ .../frontend/maxwell/translate/impl/impl.cpp | 79 ++ .../frontend/maxwell/translate/impl/impl.h | 316 ++++++ .../translate/impl/load_store_attribute.cpp | 92 ++ .../maxwell/translate/impl/load_store_memory.cpp | 90 ++ .../maxwell/translate/impl/not_implemented.cpp | 1105 ++++++++++++++++++++ .../maxwell/translate/impl/register_move.cpp | 45 + .../frontend/maxwell/translate/translate.cpp | 50 + .../frontend/maxwell/translate/translate.h | 16 + 24 files changed, 3555 insertions(+) create mode 100644 src/shader_recompiler/frontend/maxwell/control_flow.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/control_flow.h create mode 100644 src/shader_recompiler/frontend/maxwell/decode.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/decode.h create mode 100644 src/shader_recompiler/frontend/maxwell/instruction.h create mode 100644 src/shader_recompiler/frontend/maxwell/location.h create mode 100644 src/shader_recompiler/frontend/maxwell/maxwell.inc create mode 100644 src/shader_recompiler/frontend/maxwell/opcode.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/opcode.h create mode 100644 src/shader_recompiler/frontend/maxwell/program.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/program.h create mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/impl.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/translate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/translate.h (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 000000000..fc4dba826 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -0,0 +1,531 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell::Flow { + +static u32 BranchOffset(Location pc, Instruction inst) { + return pc.Offset() + inst.branch.Offset() + 8; +} + +static std::array Split(Block&& block, Location pc, BlockId new_id) { + if (pc <= block.begin || pc >= block.end) { + throw InvalidArgument("Invalid address to split={}", pc); + } + return { + Block{ + .begin{block.begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{block.id}, + .stack{block.stack}, + .cond{true}, + .branch_true{new_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }, + Block{ + .begin{pc}, + .end{block.end}, + .end_class{block.end_class}, + .id{new_id}, + .stack{std::move(block.stack)}, + .cond{block.cond}, + .branch_true{block.branch_true}, + .branch_false{block.branch_false}, + }, + }; +} + +static Token OpcodeToken(Opcode opcode) { + switch (opcode) { + case Opcode::PBK: + case Opcode::BRK: + return Token::PBK; + case Opcode::PCNT: + case Opcode::CONT: + return Token::PBK; + case Opcode::PEXIT: + case Opcode::EXIT: + return Token::PEXIT; + case Opcode::PLONGJMP: + case Opcode::LONGJMP: + return Token::PLONGJMP; + case Opcode::PRET: + case Opcode::RET: + case Opcode::CAL: + return Token::PRET; + case Opcode::SSY: + case Opcode::SYNC: + return Token::SSY; + default: + throw InvalidArgument("{}", opcode); + } +} + +static bool IsAbsoluteJump(Opcode opcode) { + switch (opcode) { + case Opcode::JCAL: + case Opcode::JMP: + case Opcode::JMX: + return true; + default: + return false; + } +} + +static bool HasFlowTest(Opcode opcode) { + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::EXIT: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::RET: + case Opcode::SYNC: + return true; + case Opcode::CAL: + case Opcode::JCAL: + return false; + default: + throw InvalidArgument("Invalid branch {}", opcode); + } +} + +static std::string Name(const Block& block) { + if (block.begin.IsVirtual()) { + return fmt::format("\"Virtual {}\"", block.id); + } else { + return fmt::format("\"{}\"", block.begin); + } +} + +void Stack::Push(Token token, Location target) { + entries.push_back({ + .token{token}, + .target{target}, + }); +} + +std::pair Stack::Pop(Token token) const { + const std::optional pc{Peek(token)}; + if (!pc) { + throw LogicError("Token could not be found"); + } + return {*pc, Remove(token)}; +} + +std::optional Stack::Peek(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + if (it == reverse_entries.end()) { + return std::nullopt; + } + return it->target; +} + +Stack Stack::Remove(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + const auto pos{std::distance(reverse_entries.begin(), it)}; + Stack result; + result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); + return result; +} + +bool Block::Contains(Location pc) const noexcept { + return pc >= begin && pc < end; +} + +Function::Function(Location start_address) + : entrypoint{start_address}, labels{Label{ + .address{start_address}, + .block_id{0}, + .stack{}, + }} {} + +CFG::CFG(Environment& env_, Location start_address) : env{env_} { + functions.emplace_back(start_address); + for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { + while (!functions[function_id].labels.empty()) { + Function& function{functions[function_id]}; + Label label{function.labels.back()}; + function.labels.pop_back(); + AnalyzeLabel(function_id, label); + } + } +} + +void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { + if (InspectVisitedBlocks(function_id, label)) { + // Label address has been visited + return; + } + // Try to find the next block + Function* function{&functions[function_id]}; + Location pc{label.address}; + const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, + [function](Location pc, u32 block_index) { + return pc < function->blocks_data[block_index].begin; + })}; + const auto next_index{std::distance(function->blocks.begin(), next)}; + const bool is_last{next == function->blocks.end()}; + Location next_pc; + BlockId next_id{UNREACHABLE_BLOCK_ID}; + if (!is_last) { + next_pc = function->blocks_data[*next].begin; + next_id = function->blocks_data[*next].id; + } + // Insert before the next block + Block block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{label.block_id}, + .stack{std::move(label.stack)}, + .cond{true}, + .branch_true{UNREACHABLE_BLOCK_ID}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }; + // Analyze instructions until it reaches an already visited block or there's a branch + bool is_branch{false}; + while (is_last || pc < next_pc) { + is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; + if (is_branch) { + break; + } + ++pc; + } + if (!is_branch) { + // If the block finished without a branch, + // it means that the next instruction is already visited, jump to it + block.end = pc; + block.cond = true; + block.branch_true = next_id; + block.branch_false = UNREACHABLE_BLOCK_ID; + } + // Function's pointer might be invalid, resolve it again + function = &functions[function_id]; + const u32 new_block_index = static_cast(function->blocks_data.size()); + function->blocks.insert(function->blocks.begin() + next_index, new_block_index); + function->blocks_data.push_back(std::move(block)); +} + +bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { + const Location pc{label.address}; + Function& function{functions[function_id]}; + const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { + return function.blocks_data[block_index].Contains(pc); + })}; + if (it == function.blocks.end()) { + // Address has not been visited + return false; + } + Block& block{function.blocks_data[*it]}; + if (block.begin == pc) { + throw LogicError("Dangling branch"); + } + const u32 first_index{*it}; + const u32 second_index{static_cast(function.blocks_data.size())}; + const std::array new_indices{first_index, second_index}; + std::array split_blocks{Split(std::move(block), pc, label.block_id)}; + function.blocks_data[*it] = std::move(split_blocks[0]); + function.blocks_data.push_back(std::move(split_blocks[1])); + function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + return true; +} + +CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { + const Instruction inst{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(inst.raw)}; + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::RET: + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + switch (opcode) { + case Opcode::BRA: + case Opcode::JMP: + AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::BRX: + case Opcode::JMX: + AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::RET: + block.end_class = EndClass::Return; + break; + default: + break; + } + block.end = pc; + return AnalysisState::Branch; + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::SYNC: { + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; + block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block.end = pc; + return AnalysisState::Branch; + } + case Opcode::PBK: + case Opcode::PCNT: + case Opcode::PEXIT: + case Opcode::PLONGJMP: + case Opcode::SSY: + block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + return AnalysisState::Continue; + case Opcode::EXIT: + return AnalyzeEXIT(block, function_id, pc, inst); + case Opcode::PRET: + throw NotImplementedException("PRET flow analysis"); + case Opcode::CAL: + case Opcode::JCAL: { + const bool is_absolute{IsAbsoluteJump(opcode)}; + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function into the list if it doesn't exist + if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { + functions.push_back(cal_pc); + } + // Handle CAL like a regular instruction + break; + } + default: + break; + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{true} || pred == Predicate{false}) { + return AnalysisState::Continue; + } + const IR::Condition cond{static_cast(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + return AnalysisState::Branch; +} + +void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, + EndClass insn_end_class, IR::Condition cond) { + if (block.begin != pc) { + // If the block doesn't start in the conditional instruction + // mark it as a label to visit it later + block.end = pc; + block.cond = true; + block.branch_true = AddLabel(block, block.stack, pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return; + } + // Impersonate the visited block with a virtual block + // Jump from this virtual to the real conditional instruction and the next instruction + Function& function{functions[function_id]}; + const BlockId conditional_block_id{++function.current_block_id}; + function.blocks.push_back(static_cast(function.blocks_data.size())); + Block& virtual_block{function.blocks_data.emplace_back(Block{ + .begin{}, // Virtual block + .end{}, + .end_class{EndClass::Branch}, + .id{block.id}, // Impersonating + .stack{block.stack}, + .cond{cond}, + .branch_true{conditional_block_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + })}; + // Set the end properties of the conditional instruction and give it a new identity + Block& conditional_block{block}; + conditional_block.end = pc; + conditional_block.end_class = insn_end_class; + conditional_block.id = conditional_block_id; + // Add a label to the instruction after the conditional instruction + const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + // Branch to the next instruction from the virtual block + virtual_block.branch_false = endif_block_id; + // And branch to it from the conditional instruction if it is a branch + if (insn_end_class == EndClass::Branch) { + conditional_block.cond = true; + conditional_block.branch_true = endif_block_id; + conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + } +} + +bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode) { + if (inst.branch.is_cbuf) { + throw NotImplementedException("Branch with constant buffer offset"); + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false}) { + return false; + } + const bool has_flow_test{HasFlowTest(opcode)}; + const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + block.cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); + block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + } else { + block.cond = true; + } + return true; +} + +void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute) { + const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); +} + +void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { + throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +} + +void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function to the function list if it doesn't exist + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + if (it == functions.end()) { + functions.emplace_back(cal_pc); + } +} + +CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, + Instruction inst) { + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { + // EXIT will never be taken + return AnalysisState::Continue; + } + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + if (block.stack.Peek(Token::PEXIT).has_value()) { + throw NotImplementedException("Conditional EXIT with PEXIT token"); + } + const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + return AnalysisState::Branch; + } + if (const std::optional exit_pc{block.stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; + block.cond = true; + block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return AnalysisState::Branch; + } + block.end = pc; + block.end_class = EndClass::Exit; + return AnalysisState::Branch; +} + +BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { + Function& function{functions[function_id]}; + if (block.begin == pc) { + return block.id; + } + const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; + if (target != function.blocks_data.end()) { + return target->id; + } + const BlockId block_id{++function.current_block_id}; + function.labels.push_back(Label{ + .address{pc}, + .block_id{block_id}, + .stack{std::move(stack)}, + }); + return block_id; +} + +std::string CFG::Dot() const { + int node_uid{0}; + + std::string dot{"digraph shader {\n"}; + for (const Function& function : functions) { + dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); + dot += fmt::format("\t\tnode [style=filled];\n"); + for (const u32 block_index : function.blocks) { + const Block& block{function.blocks_data[block_index]}; + const std::string name{Name(block)}; + const auto add_branch = [&](BlockId branch_id, bool add_label) { + const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; + dot += fmt::format("\t\t{}->", name); + if (it == function.blocks_data.end()) { + dot += fmt::format("\"Unknown label {}\"", branch_id); + } else { + dot += Name(*it); + }; + if (add_label && block.cond != true && block.cond != false) { + dot += fmt::format(" [label=\"{}\"]", block.cond); + } + dot += '\n'; + }; + dot += fmt::format("\t\t{};\n", name); + switch (block.end_class) { + case EndClass::Branch: + if (block.cond != false) { + add_branch(block.branch_true, true); + } + if (block.cond != true) { + add_branch(block.branch_false, false); + } + break; + case EndClass::Exit: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Return: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Unreachable: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format( + "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); + ++node_uid; + break; + } + } + if (function.entrypoint == 8) { + dot += fmt::format("\t\tlabel = \"main\";\n"); + } else { + dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); + } + dot += "\t}\n"; + } + if (!functions.empty()) { + if (functions.front().blocks.empty()) { + dot += "Start;\n"; + } else { + dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + } + dot += fmt::format("\tStart [shape=diamond];\n"); + } + dot += "}\n"; + return dot; +} + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h new file mode 100644 index 000000000..b2ab0cdc3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -0,0 +1,137 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/condition.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell::Flow { + +using BlockId = u32; +using FunctionId = size_t; + +constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast(-1)}; + +enum class EndClass { + Branch, + Exit, + Return, + Unreachable, +}; + +enum class Token { + SSY, + PBK, + PEXIT, + PRET, + PCNT, + PLONGJMP, +}; + +struct StackEntry { + auto operator<=>(const StackEntry&) const noexcept = default; + + Token token; + Location target; +}; + +class Stack { +public: + void Push(Token token, Location target); + [[nodiscard]] std::pair Pop(Token token) const; + [[nodiscard]] std::optional Peek(Token token) const; + [[nodiscard]] Stack Remove(Token token) const; + +private: + boost::container::small_vector entries; +}; + +struct Block { + [[nodiscard]] bool Contains(Location pc) const noexcept; + + Location begin; + Location end; + EndClass end_class; + BlockId id; + Stack stack; + IR::Condition cond; + BlockId branch_true; + BlockId branch_false; +}; + +struct Label { + Location address; + BlockId block_id; + Stack stack; +}; + +struct Function { + Function(Location start_address); + + Location entrypoint; + BlockId current_block_id{0}; + boost::container::small_vector labels; + boost::container::small_vector blocks; + boost::container::small_vector blocks_data; +}; + +class CFG { + enum class AnalysisState { + Branch, + Continue, + }; + +public: + explicit CFG(Environment& env, Location start_address); + + [[nodiscard]] std::string Dot() const; + + [[nodiscard]] std::span Functions() const noexcept { + return std::span(functions.data(), functions.size()); + } + +private: + void AnalyzeLabel(FunctionId function_id, Label& label); + + /// Inspect already visited blocks. + /// Return true when the block has already been visited + [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + + [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + + void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + IR::Condition cond); + + /// Return true when the branch instruction is confirmed to be a branch + [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, + Instruction inst, Opcode opcode); + + void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute); + void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + + /// Return the branch target block id + [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, + FunctionId function_id); + + Environment& env; + boost::container::small_vector functions; + FunctionId current_function_id{0}; +}; + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp new file mode 100644 index 000000000..ab1cc6c8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { +namespace { +struct MaskValue { + u64 mask; + u64 value; +}; + +constexpr MaskValue MaskValueFromEncoding(const char* encoding) { + u64 mask{}; + u64 value{}; + u64 bit{u64(1) << 63}; + while (*encoding) { + switch (*encoding) { + case '0': + mask |= bit; + break; + case '1': + mask |= bit; + value |= bit; + break; + case '-': + break; + case ' ': + break; + default: + throw LogicError("Invalid encoding character '{}'", *encoding); + } + ++encoding; + if (*encoding != ' ') { + bit >>= 1; + } + } + return MaskValue{.mask{mask}, .value{value}}; +} + +struct InstEncoding { + MaskValue mask_value; + Opcode opcode; +}; +constexpr std::array UNORDERED_ENCODINGS{ +#define INST(name, cute, encode) \ + InstEncoding{ \ + .mask_value{MaskValueFromEncoding(encode)}, \ + .opcode{Opcode::name}, \ + }, +#include "maxwell.inc" +#undef INST +}; + +constexpr auto SortedEncodings() { + std::array encodings{UNORDERED_ENCODINGS}; + std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) { + return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask); + }); + return encodings; +} +constexpr auto ENCODINGS{SortedEncodings()}; + +constexpr int WidestLeftBits() { + int bits{64}; + for (const InstEncoding& encoding : ENCODINGS) { + bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); + } + return 64 - bits; +} +constexpr int WIDEST_LEFT_BITS{WidestLeftBits()}; +constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS}; + +constexpr size_t ToFastLookupIndex(u64 value) { + return static_cast(value >> MASK_SHIFT); +} + +constexpr size_t FastLookupSize() { + size_t max_width{}; + for (const InstEncoding& encoding : ENCODINGS) { + max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); + } + return max_width + 1; +} +constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()}; + +struct InstInfo { + [[nodiscard]] u64 Mask() const noexcept { + return static_cast(high_mask) << MASK_SHIFT; + } + + [[nodiscard]] u64 Value() const noexcept { + return static_cast(high_value) << MASK_SHIFT; + } + + u16 high_mask; + u16 high_value; + Opcode opcode; +}; + +constexpr auto MakeFastLookupTableIndex(size_t index) { + std::array encodings{}; + size_t element{}; + for (const auto& encoding : ENCODINGS) { + const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)}; + const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; + if ((index & mask) == value) { + encodings.at(element) = InstInfo{ + .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, + .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, + .opcode{encoding.opcode}, + }; + ++element; + } + } + return encodings; +} + +/*constexpr*/ auto MakeFastLookupTable() { + auto encodings{std::make_unique, FAST_LOOKUP_SIZE>>()}; + for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) { + (*encodings)[index] = MakeFastLookupTableIndex(index); + } + return encodings; +} +const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()}; +} // Anonymous namespace + +Opcode Decode(u64 insn) { + const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]}; + const auto it{std::ranges::find_if( + table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })}; + if (it == table.end()) { + throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn); + } + return it->opcode; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h new file mode 100644 index 000000000..2a3dd28e8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -0,0 +1,14 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { + +[[nodiscard]] Opcode Decode(u64 insn); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h new file mode 100644 index 000000000..57fd531f2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/flow_test.h" + +namespace Shader::Maxwell { + +struct Predicate { + Predicate() = default; + Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {} + Predicate(bool value) : index{7}, negated{!value} {} + Predicate(u64 raw) : index{static_cast(raw & 7)}, negated{(raw & 8) != 0} {} + + unsigned index; + bool negated; +}; + +inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept { + return lhs.index == rhs.index && lhs.negated == rhs.negated; +} + +inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept { + return !(lhs == rhs); +} + +union Instruction { + Instruction(u64 raw_) : raw{raw_} {} + + u64 raw; + + union { + BitField<5, 1, u64> is_cbuf; + BitField<0, 5, IR::FlowTest> flow_test; + + [[nodiscard]] u32 Absolute() const noexcept { + return static_cast(absolute); + } + + [[nodiscard]] s32 Offset() const noexcept { + return static_cast(offset); + } + + private: + BitField<20, 24, s64> offset; + BitField<20, 32, u64> absolute; + } branch; + + [[nodiscard]] Predicate Pred() const noexcept { + return Predicate{pred}; + } + +private: + BitField<16, 4, u64> pred; +}; +static_assert(std::is_trivially_copyable_v); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h new file mode 100644 index 000000000..66b51a19e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -0,0 +1,106 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Maxwell { + +class Location { + static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits::max()}; + +public: + constexpr Location() = default; + + constexpr Location(u32 initial_offset) : offset{initial_offset} { + if (initial_offset % 8 != 0) { + throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset); + } + Align(); + } + + [[nodiscard]] constexpr u32 Offset() const noexcept { + return offset; + } + + [[nodiscard]] constexpr bool IsVirtual() const { + return offset == VIRTUAL_OFFSET; + } + + constexpr auto operator<=>(const Location&) const noexcept = default; + + constexpr Location operator++() noexcept { + const Location copy{*this}; + Step(); + return copy; + } + + constexpr Location operator++(int) noexcept { + Step(); + return *this; + } + + constexpr Location operator--() noexcept { + const Location copy{*this}; + Back(); + return copy; + } + + constexpr Location operator--(int) noexcept { + Back(); + return *this; + } + + constexpr Location operator+(int number) const { + Location new_pc{*this}; + while (number > 0) { + --number; + ++new_pc; + } + while (number < 0) { + ++number; + --new_pc; + } + return new_pc; + } + + constexpr Location operator-(int number) const { + return operator+(-number); + } + +private: + constexpr void Align() { + offset += offset % 32 == 0 ? 8 : 0; + } + + constexpr void Step() { + offset += 8 + (offset % 32 == 24 ? 8 : 0); + } + + constexpr void Back() { + offset -= 8 + (offset % 32 == 8 ? 8 : 0); + } + + u32 offset{VIRTUAL_OFFSET}; +}; + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{:04x}", location.Offset()); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc new file mode 100644 index 000000000..1515285bf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -0,0 +1,285 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +INST(AL2P, "AL2P", "1110 1111 1010 0---") +INST(ALD, "ALD", "1110 1111 1101 1---") +INST(AST, "AST", "1110 1111 1111 0---") +INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----") +INST(ATOM, "ATOM", "1110 1101 ---- ----") +INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----") +INST(ATOMS, "ATOMS", "1110 1100 ---- ----") +INST(B2R, "B2R", "1111 0000 1011 1---") +INST(BAR, "BAR", "1111 0000 1010 1---") +INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---") +INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---") +INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---") +INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---") +INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---") +INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---") +INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---") +INST(BPT, "BPT", "1110 0011 1010 ----") +INST(BRA, "BRA", "1110 0010 0100 ----") +INST(BRK, "BRK", "1110 0011 0100 ----") +INST(BRX, "BRX", "1110 0010 0101 ----") +INST(CAL, "CAL", "1110 0010 0110 ----") +INST(CCTL, "CCTL", "1110 1111 011- ----") +INST(CCTLL, "CCTLL", "1110 1111 100- ----") +INST(CONT, "CONT", "1110 0011 0101 ----") +INST(CS2R, "CS2R", "0101 0000 1100 1---") +INST(CSET, "CSET", "0101 0000 1001 1---") +INST(CSETP, "CSETP", "0101 0000 1010 0---") +INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---") +INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---") +INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") +INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") +INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") +INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") +INST(DFMA_cr, "DFMA (cr)", "0010 1011 0111 ----") +INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") +INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") +INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") +INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") +INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") +INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---") +INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----") +INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----") +INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----") +INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----") +INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----") +INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----") +INST(EXIT, "EXIT", "1110 0011 0000 ----") +INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---") +INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---") +INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---") +INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---") +INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---") +INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---") +INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---") +INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---") +INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---") +INST(FADD32I, "FADD32I", "0000 10-- ---- ----") +INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---") +INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---") +INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---") +INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----") +INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----") +INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----") +INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----") +INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----") +INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----") +INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----") +INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----") +INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----") +INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---") +INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---") +INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---") +INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---") +INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---") +INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---") +INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---") +INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---") +INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---") +INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----") +INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----") +INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----") +INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----") +INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----") +INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----") +INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----") +INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---") +INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----") +INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----") +INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---") +INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----") +INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----") +INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----") +INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---") +INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----") +INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----") +INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----") +INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----") +INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---") +INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") +INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") +INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") +INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") +INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 1100 1--- ----") +INST(HSET2_imm, "HSET2 (imm)", "0111 1100 0--- ----") +INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") +INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") +INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") +INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---") +INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---") +INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---") +INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---") +INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---") +INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---") +INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---") +INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---") +INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---") +INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----") +INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----") +INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----") +INST(IADD32I, "IADD32I", "0001 110- ---- ----") +INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----") +INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----") +INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----") +INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----") +INST(IDE, "IDE", "1110 0011 1001 ----") +INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---") +INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---") +INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----") +INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----") +INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----") +INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----") +INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----") +INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----") +INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----") +INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----") +INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----") +INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---") +INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---") +INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---") +INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---") +INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---") +INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---") +INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----") +INST(IPA, "IPA", "1110 0000 ---- ----") +INST(ISBERD, "ISBERD", "1110 1111 1101 0---") +INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---") +INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---") +INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---") +INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----") +INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----") +INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----") +INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----") +INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----") +INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----") +INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----") +INST(JCAL, "JCAL", "1110 0010 0010 ----") +INST(JMP, "JMP", "1110 0010 0001 ----") +INST(JMX, "JMX", "1110 0010 0000 ----") +INST(KIL, "KIL", "1110 0011 0011 ----") +INST(LD, "LD", "100- ---- ---- ----") +INST(LDC, "LDC", "1110 1111 1001 0---") +INST(LDG, "LDG", "1110 1110 1101 0---") +INST(LDL, "LDL", "1110 1111 0100 0---") +INST(LDS, "LDS", "1110 1111 0100 1---") +INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---") +INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----") +INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---") +INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----") +INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---") +INST(LEPC, "LEPC", "0101 0000 1101 0---") +INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----") +INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") +INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") +INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") +INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") +INST(LOP3_cbuf, "LOP3 (cbuf)", "0011 11-- ---- ----") +INST(LOP3_imm, "LOP3 (imm)", "0000 001- ---- ----") +INST(LOP32I, "LOP32I", "0000 01-- ---- ----") +INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") +INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") +INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---") +INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---") +INST(MOV32I, "MOV32I", "0000 0001 0000 ----") +INST(MUFU, "MUFU", "0101 0000 1000 0---") +INST(NOP, "NOP", "0101 0000 1011 0---") +INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---") +INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---") +INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---") +INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---") +INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---") +INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---") +INST(PBK, "PBK", "1110 0010 1010 ----") +INST(PCNT, "PCNT", "1110 0010 1011 ----") +INST(PEXIT, "PEXIT", "1110 0010 0011 ----") +INST(PIXLD, "PIXLD", "1110 1111 1110 1---") +INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----") +INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---") +INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---") +INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---") +INST(PRET, "PRET", "1110 0010 0111 ----") +INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----") +INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----") +INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----") +INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----") +INST(PSET, "PSET", "0101 0000 1000 1---") +INST(PSETP, "PSETP", "0101 0000 1001 0---") +INST(R2B, "R2B", "1111 0000 1100 0---") +INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---") +INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---") +INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---") +INST(RAM, "RAM", "1110 0011 1000 ----") +INST(RED, "RED", "1110 1011 1111 1---") +INST(RET, "RET", "1110 0011 0010 ----") +INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---") +INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---") +INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---") +INST(RTT, "RTT", "1110 0011 0110 ----") +INST(S2R, "S2R", "1111 0000 1100 1---") +INST(SAM, "SAM", "1110 0011 0111 ----") +INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---") +INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---") +INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---") +INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----") +INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----") +INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---") +INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---") +INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---") +INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---") +INST(SHFL, "SHFL", "1110 1111 0001 0---") +INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---") +INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---") +INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---") +INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---") +INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---") +INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---") +INST(SSY, "SSY", "1110 0010 1001 ----") +INST(ST, "ST", "101- ---- ---- ----") +INST(STG, "STG", "1110 1110 1101 1---") +INST(STL, "STL", "1110 1111 0101 0---") +INST(STP, "STP", "1110 1110 1010 0---") +INST(STS, "STS", "1110 1111 0101 1---") +INST(SUATOM_cas, "SUATOM", "1110 1010 ---- ----") +INST(SULD, "SULD", "1110 1011 000- ----") +INST(SURED, "SURED", "1110 1011 010- ----") +INST(SUST, "SUST", "1110 1011 001- ----") +INST(SYNC, "SYNC", "1111 0000 1111 1---") +INST(TEX, "TEX", "1100 00-- --11 1---") +INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEXS, "TEXS", "1101 -00- ---- ----") +INST(TLD, "TLD", "1101 1100 --11 1---") +INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") +INST(TLD4, "TLD4", "1100 10-- --11 1---") +INST(TLD4_b, "TLD4 (b)", "1101 1110 1111 1---") +INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") +INST(TLDS, "TLDS", "1101 -01- ---- ----") +INST(TMML, "TMML", "1101 1111 0101 1---") +INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") +INST(TXA, "TXA", "1101 1111 0100 0---") +INST(TXD, "TXD", "1101 1110 0011 10--") +INST(TXD_b, "TXD (b)", "1101 1110 0111 10--") +INST(TXQ, "TXQ", "1101 1111 0100 1---") +INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") +INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") +INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----") +INST(VADD, "VADD", "0010 00-- ---- ----") +INST(VMAD, "VMAD", "0101 1111 ---- ----") +INST(VMNMX, "VMNMX", "0011 101- ---- ----") +INST(VOTE, "VOTE", "0101 0000 1101 1---") +INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---") +INST(VSET, "VSET", "0100 000- ---- ----") +INST(VSETP, "VSETP", "0101 0000 1111 0---") +INST(VSHL, "VSHL", "0101 0111 ---- ----") +INST(VSHR, "VSHR", "0101 0110 ---- ----") +INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----") +INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----") +INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----") +INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----") + +// Removed due to its weird formatting making fast tables larger +// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0") diff --git a/src/shader_recompiler/frontend/maxwell/opcode.cpp b/src/shader_recompiler/frontend/maxwell/opcode.cpp new file mode 100644 index 000000000..8a7bdb611 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcode.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { +namespace { +constexpr std::array NAME_TABLE{ +#define INST(name, cute, encode) #cute, +#include "maxwell.inc" +#undef INST +}; +} // Anonymous namespace + +const char* NameOf(Opcode opcode) { + if (static_cast(opcode) >= NAME_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", static_cast(opcode)); + } + return NAME_TABLE[static_cast(opcode)]; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcode.h b/src/shader_recompiler/frontend/maxwell/opcode.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcode.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Shader::Maxwell { + +enum class Opcode { +#define INST(name, cute, encode) name, +#include "maxwell.inc" +#undef INST +}; + +const char* NameOf(Opcode opcode); + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { + return format_to(ctx.out(), "{}", NameOf(opcode)); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp new file mode 100644 index 000000000..67a98ba57 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/termination_code.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +Program::Function::~Function() { + std::ranges::for_each(blocks, &std::destroy_at); +} + +Program::Program(Environment& env, const Flow::CFG& cfg) { + std::vector block_map; + functions.reserve(cfg.Functions().size()); + + for (const Flow::Function& cfg_function : cfg.Functions()) { + Function& function{functions.emplace_back()}; + + const size_t num_blocks{cfg_function.blocks.size()}; + IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; + function.blocks.reserve(num_blocks); + + block_map.resize(cfg_function.blocks_data.size()); + + // Visit the instructions of all blocks + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + + IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; + ++block_memory; + function.blocks.push_back(block); + block_map[flow_block.id] = block; + } + // Now that all blocks are defined, emit the termination instructions + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } + } +} + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map inst_to_index; + std::map block_to_index; + + for (const Program::Function& function : program.functions) { + for (const IR::Block* const block : function.blocks) { + block_to_index.emplace(block, index); + ++index; + } + } + std::string ret; + for (const Program::Function& function : program.functions) { + ret += fmt::format("Function\n"); + for (const IR::Block* const block : function.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + } + return ret; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h new file mode 100644 index 000000000..7814b2c01 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +class Program { + friend std::string DumpProgram(const Program& program); + +public: + explicit Program(Environment& env, const Flow::CFG& cfg); + +private: + struct Function { + ~Function(); + + std::vector blocks; + }; + + boost::pool_allocator + block_alloc_pool; + std::vector functions; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp new file mode 100644 index 000000000..a4ea5c5e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/termination_code.h" + +namespace Shader::Maxwell { + +static void EmitExit(IR::IREmitter& ir) { + ir.Exit(); +} + +static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { + switch (flow_test) { + case IR::FlowTest::T: + return ir.Imm1(true); + case IR::FlowTest::F: + return ir.Imm1(false); + case IR::FlowTest::NE: + // FIXME: Verify this + return ir.LogicalNot(ir.GetZFlag()); + case IR::FlowTest::NaN: + // FIXME: Verify this + return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); + default: + throw NotImplementedException("Flow test {}", flow_test); + } +} + +static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { + const IR::FlowTest flow_test{cond.FlowTest()}; + const auto [pred, pred_negated]{cond.Pred()}; + if (pred == IR::Pred::PT && !pred_negated) { + return GetFlowTest(flow_test, ir); + } + if (flow_test == IR::FlowTest::T) { + return ir.GetPred(pred, pred_negated); + } + return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); +} + +static void EmitBranch(const Flow::Block& flow_block, std::span block_map, + IR::IREmitter& ir) { + if (flow_block.cond == true) { + return ir.Branch(block_map[flow_block.branch_true]); + } + if (flow_block.cond == false) { + return ir.Branch(block_map[flow_block.branch_false]); + } + return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], + block_map[flow_block.branch_false]); +} + +void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map) { + IR::Block* const block{block_map[flow_block.id]}; + IR::IREmitter ir(*block); + switch (flow_block.end_class) { + case Flow::EndClass::Branch: + EmitBranch(flow_block, block_map, ir); + break; + case Flow::EndClass::Exit: + EmitExit(ir); + break; + case Flow::EndClass::Return: + ir.Return(); + break; + case Flow::EndClass::Unreachable: + ir.Unreachable(); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h new file mode 100644 index 000000000..b0d667942 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp new file mode 100644 index 000000000..e98bbd0d1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp @@ -0,0 +1,15 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::EXIT(u64) { + ir.Exit(); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..c4288d9a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -0,0 +1,133 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class DestFormat : u64 { + Invalid, + I16, + I32, + I64, +}; +enum class SrcFormat : u64 { + Invalid, + F16, + F32, + F64, +}; +enum class Rounding : u64 { + Round, + Floor, + Ceil, + Trunc, +}; + +union F2I { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, DestFormat> dest_format; + BitField<10, 2, SrcFormat> src_format; + BitField<12, 1, u64> is_signed; + BitField<39, 1, Rounding> rounding; + BitField<49, 1, u64> half; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> abs; + BitField<47, 1, u64> cc; + BitField<49, 1, u64> neg; +}; + +size_t BitSize(DestFormat dest_format) { + switch (dest_format) { + case DestFormat::I16: + return 16; + case DestFormat::I32: + return 32; + case DestFormat::I64: + return 64; + default: + throw NotImplementedException("Invalid destination format {}", dest_format); + } +} + +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) { + // F2I is used to convert from a floating point value to an integer + const F2I f2i{insn}; + + const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::U16U32U64 rounded_value{[&] { + switch (f2i.rounding) { + case Rounding::Round: + return v.ir.FPRoundEven(float_value); + case Rounding::Floor: + return v.ir.FPFloor(float_value); + case Rounding::Ceil: + return v.ir.FPCeil(float_value); + case Rounding::Trunc: + return v.ir.FPTrunc(float_value); + default: + throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); + } + }()}; + + // TODO: Handle out of bounds conversions. + // For example converting F32 65537.0 to U16, the expected value is 0xffff, + + const bool is_signed{f2i.is_signed != 0}; + const size_t bitsize{BitSize(f2i.dest_format)}; + const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; + + v.X(f2i.dest_reg, result); + + if (f2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + if (is_signed) { + v.SetSFlag(v.ir.GetSignFromOp(result)); + } else { + v.ResetSFlag(); + } + v.ResetCFlag(); + + // TODO: Investigate if out of bound conversions sets the overflow flag + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2I_reg(u64 insn) { + union { + F2I base; + BitField<20, 8, IR::Reg> src_reg; + } const f2i{insn}; + + const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 { + switch (f2i.base.src_format) { + case SrcFormat::F16: + return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half); + case SrcFormat::F32: + return X(f2i.src_reg); + case SrcFormat::F64: + return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); + default: + throw NotImplementedException("Invalid F2I source format {}", + f2i.base.src_format.Value()); + } + }()}; + + TranslateF2I(*this, insn, op_a); +} + +void TranslatorVisitor::F2I_cbuf(u64) { + throw NotImplementedException("{}", Opcode::F2I_cbuf); +} + +void TranslatorVisitor::F2I_imm(u64) { + throw NotImplementedException("{}", Opcode::F2I_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..e2ab0dab2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Operation { + Cos = 0, + Sin = 1, + Ex2 = 2, // Base 2 exponent + Lg2 = 3, // Base 2 logarithm + Rcp = 4, // Reciprocal + Rsq = 5, // Reciprocal square root + Rcp64H = 6, // 64-bit reciprocal + Rsq64H = 7, // 64-bit reciprocal square root + Sqrt = 8, +}; +} // Anonymous namespace + +void TranslatorVisitor::MUFU(u64 insn) { + // MUFU is used to implement a bunch of special functions. See Operation. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 4, Operation> operation; + BitField<46, 1, u64> abs; + BitField<48, 1, u64> neg; + BitField<50, 1, u64> sat; + } const mufu{insn}; + + const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::U32 value{[&]() -> IR::U32 { + switch (mufu.operation) { + case Operation::Cos: + return ir.FPCosNotReduced(op_a); + case Operation::Sin: + return ir.FPSinNotReduced(op_a); + case Operation::Ex2: + return ir.FPExp2NotReduced(op_a); + case Operation::Lg2: + return ir.FPLog2(op_a); + case Operation::Rcp: + return ir.FPRecip(op_a); + case Operation::Rsq: + return ir.FPRecipSqrt(op_a); + case Operation::Rcp64H: + throw NotImplementedException("MUFU.RCP64H"); + case Operation::Rsq64H: + throw NotImplementedException("MUFU.RSQ64H"); + case Operation::Sqrt: + return ir.FPSqrt(op_a); + default: + throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); + } + }()}; + + if (mufu.sat) { + value = ir.FPSaturate(value); + } + + X(mufu.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..7bc7ce9f2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +IR::U32 TranslatorVisitor::X(IR::Reg reg) { + return ir.GetReg(reg); +} + +void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { + ir.SetReg(dest_reg, value); +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); + } + const IR::U32 binding{ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{ir.Imm32(static_cast(cbuf.offset) * 4)}; + return ir.GetCbuf(binding, byte_offset); +} + +IR::U32 TranslatorVisitor::GetImm(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const s32 positive_value{static_cast(imm.value)}; + const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + +void TranslatorVisitor::SetZFlag(const IR::U1& value) { + ir.SetZFlag(value); +} + +void TranslatorVisitor::SetSFlag(const IR::U1& value) { + ir.SetSFlag(value); +} + +void TranslatorVisitor::SetCFlag(const IR::U1& value) { + ir.SetCFlag(value); +} + +void TranslatorVisitor::SetOFlag(const IR::U1& value) { + ir.SetOFlag(value); +} + +void TranslatorVisitor::ResetZero() { + SetZFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetSFlag() { + SetSFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetCFlag() { + SetCFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetOFlag() { + SetOFlag(ir.Imm1(false)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..bc607b002 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -0,0 +1,316 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" + +namespace Shader::Maxwell { + +class TranslatorVisitor { +public: + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + + Environment& env; + IR::IREmitter ir; + + void AL2P(u64 insn); + void ALD(u64 insn); + void AST(u64 insn); + void ATOM_cas(u64 insn); + void ATOM(u64 insn); + void ATOMS_cas(u64 insn); + void ATOMS(u64 insn); + void B2R(u64 insn); + void BAR(u64 insn); + void BFE_reg(u64 insn); + void BFE_cbuf(u64 insn); + void BFE_imm(u64 insn); + void BFI_reg(u64 insn); + void BFI_rc(u64 insn); + void BFI_cr(u64 insn); + void BFI_imm(u64 insn); + void BPT(u64 insn); + void BRA(u64 insn); + void BRK(u64 insn); + void BRX(u64 insn); + void CAL(u64 insn); + void CCTL(u64 insn); + void CCTLL(u64 insn); + void CONT(u64 insn); + void CS2R(u64 insn); + void CSET(u64 insn); + void CSETP(u64 insn); + void DADD_reg(u64 insn); + void DADD_cbuf(u64 insn); + void DADD_imm(u64 insn); + void DEPBAR(u64 insn); + void DFMA_reg(u64 insn); + void DFMA_rc(u64 insn); + void DFMA_cr(u64 insn); + void DFMA_imm(u64 insn); + void DMNMX_reg(u64 insn); + void DMNMX_cbuf(u64 insn); + void DMNMX_imm(u64 insn); + void DMUL_reg(u64 insn); + void DMUL_cbuf(u64 insn); + void DMUL_imm(u64 insn); + void DSET_reg(u64 insn); + void DSET_cbuf(u64 insn); + void DSET_imm(u64 insn); + void DSETP_reg(u64 insn); + void DSETP_cbuf(u64 insn); + void DSETP_imm(u64 insn); + void EXIT(u64 insn); + void F2F_reg(u64 insn); + void F2F_cbuf(u64 insn); + void F2F_imm(u64 insn); + void F2I_reg(u64 insn); + void F2I_cbuf(u64 insn); + void F2I_imm(u64 insn); + void FADD_reg(u64 insn); + void FADD_cbuf(u64 insn); + void FADD_imm(u64 insn); + void FADD32I(u64 insn); + void FCHK_reg(u64 insn); + void FCHK_cbuf(u64 insn); + void FCHK_imm(u64 insn); + void FCMP_reg(u64 insn); + void FCMP_rc(u64 insn); + void FCMP_cr(u64 insn); + void FCMP_imm(u64 insn); + void FFMA_reg(u64 insn); + void FFMA_rc(u64 insn); + void FFMA_cr(u64 insn); + void FFMA_imm(u64 insn); + void FFMA32I(u64 insn); + void FLO_reg(u64 insn); + void FLO_cbuf(u64 insn); + void FLO_imm(u64 insn); + void FMNMX_reg(u64 insn); + void FMNMX_cbuf(u64 insn); + void FMNMX_imm(u64 insn); + void FMUL_reg(u64 insn); + void FMUL_cbuf(u64 insn); + void FMUL_imm(u64 insn); + void FMUL32I(u64 insn); + void FSET_reg(u64 insn); + void FSET_cbuf(u64 insn); + void FSET_imm(u64 insn); + void FSETP_reg(u64 insn); + void FSETP_cbuf(u64 insn); + void FSETP_imm(u64 insn); + void FSWZADD(u64 insn); + void GETCRSPTR(u64 insn); + void GETLMEMBASE(u64 insn); + void HADD2_reg(u64 insn); + void HADD2_cbuf(u64 insn); + void HADD2_imm(u64 insn); + void HADD2_32I(u64 insn); + void HFMA2_reg(u64 insn); + void HFMA2_rc(u64 insn); + void HFMA2_cr(u64 insn); + void HFMA2_imm(u64 insn); + void HFMA2_32I(u64 insn); + void HMUL2_reg(u64 insn); + void HMUL2_cbuf(u64 insn); + void HMUL2_imm(u64 insn); + void HMUL2_32I(u64 insn); + void HSET2_reg(u64 insn); + void HSET2_cbuf(u64 insn); + void HSET2_imm(u64 insn); + void HSETP2_reg(u64 insn); + void HSETP2_cbuf(u64 insn); + void HSETP2_imm(u64 insn); + void I2F_reg(u64 insn); + void I2F_cbuf(u64 insn); + void I2F_imm(u64 insn); + void I2I_reg(u64 insn); + void I2I_cbuf(u64 insn); + void I2I_imm(u64 insn); + void IADD_reg(u64 insn); + void IADD_cbuf(u64 insn); + void IADD_imm(u64 insn); + void IADD3_reg(u64 insn); + void IADD3_cbuf(u64 insn); + void IADD3_imm(u64 insn); + void IADD32I(u64 insn); + void ICMP_reg(u64 insn); + void ICMP_rc(u64 insn); + void ICMP_cr(u64 insn); + void ICMP_imm(u64 insn); + void IDE(u64 insn); + void IDP_reg(u64 insn); + void IDP_imm(u64 insn); + void IMAD_reg(u64 insn); + void IMAD_rc(u64 insn); + void IMAD_cr(u64 insn); + void IMAD_imm(u64 insn); + void IMAD32I(u64 insn); + void IMADSP_reg(u64 insn); + void IMADSP_rc(u64 insn); + void IMADSP_cr(u64 insn); + void IMADSP_imm(u64 insn); + void IMNMX_reg(u64 insn); + void IMNMX_cbuf(u64 insn); + void IMNMX_imm(u64 insn); + void IMUL_reg(u64 insn); + void IMUL_cbuf(u64 insn); + void IMUL_imm(u64 insn); + void IMUL32I(u64 insn); + void IPA(u64 insn); + void ISBERD(u64 insn); + void ISCADD_reg(u64 insn); + void ISCADD_cbuf(u64 insn); + void ISCADD_imm(u64 insn); + void ISCADD32I(u64 insn); + void ISET_reg(u64 insn); + void ISET_cbuf(u64 insn); + void ISET_imm(u64 insn); + void ISETP_reg(u64 insn); + void ISETP_cbuf(u64 insn); + void ISETP_imm(u64 insn); + void JCAL(u64 insn); + void JMP(u64 insn); + void JMX(u64 insn); + void KIL(u64 insn); + void LD(u64 insn); + void LDC(u64 insn); + void LDG(u64 insn); + void LDL(u64 insn); + void LDS(u64 insn); + void LEA_hi_reg(u64 insn); + void LEA_hi_cbuf(u64 insn); + void LEA_lo_reg(u64 insn); + void LEA_lo_cbuf(u64 insn); + void LEA_lo_imm(u64 insn); + void LEPC(u64 insn); + void LONGJMP(u64 insn); + void LOP_reg(u64 insn); + void LOP_cbuf(u64 insn); + void LOP_imm(u64 insn); + void LOP3_reg(u64 insn); + void LOP3_cbuf(u64 insn); + void LOP3_imm(u64 insn); + void LOP32I(u64 insn); + void MEMBAR(u64 insn); + void MOV_reg(u64 insn); + void MOV_cbuf(u64 insn); + void MOV_imm(u64 insn); + void MOV32I(u64 insn); + void MUFU(u64 insn); + void NOP(u64 insn); + void OUT_reg(u64 insn); + void OUT_cbuf(u64 insn); + void OUT_imm(u64 insn); + void P2R_reg(u64 insn); + void P2R_cbuf(u64 insn); + void P2R_imm(u64 insn); + void PBK(u64 insn); + void PCNT(u64 insn); + void PEXIT(u64 insn); + void PIXLD(u64 insn); + void PLONGJMP(u64 insn); + void POPC_reg(u64 insn); + void POPC_cbuf(u64 insn); + void POPC_imm(u64 insn); + void PRET(u64 insn); + void PRMT_reg(u64 insn); + void PRMT_rc(u64 insn); + void PRMT_cr(u64 insn); + void PRMT_imm(u64 insn); + void PSET(u64 insn); + void PSETP(u64 insn); + void R2B(u64 insn); + void R2P_reg(u64 insn); + void R2P_cbuf(u64 insn); + void R2P_imm(u64 insn); + void RAM(u64 insn); + void RED(u64 insn); + void RET(u64 insn); + void RRO_reg(u64 insn); + void RRO_cbuf(u64 insn); + void RRO_imm(u64 insn); + void RTT(u64 insn); + void S2R(u64 insn); + void SAM(u64 insn); + void SEL_reg(u64 insn); + void SEL_cbuf(u64 insn); + void SEL_imm(u64 insn); + void SETCRSPTR(u64 insn); + void SETLMEMBASE(u64 insn); + void SHF_l_reg(u64 insn); + void SHF_l_imm(u64 insn); + void SHF_r_reg(u64 insn); + void SHF_r_imm(u64 insn); + void SHFL(u64 insn); + void SHL_reg(u64 insn); + void SHL_cbuf(u64 insn); + void SHL_imm(u64 insn); + void SHR_reg(u64 insn); + void SHR_cbuf(u64 insn); + void SHR_imm(u64 insn); + void SSY(u64 insn); + void ST(u64 insn); + void STG(u64 insn); + void STL(u64 insn); + void STP(u64 insn); + void STS(u64 insn); + void SUATOM_cas(u64 insn); + void SULD(u64 insn); + void SURED(u64 insn); + void SUST(u64 insn); + void SYNC(u64 insn); + void TEX(u64 insn); + void TEX_b(u64 insn); + void TEXS(u64 insn); + void TLD(u64 insn); + void TLD_b(u64 insn); + void TLD4(u64 insn); + void TLD4_b(u64 insn); + void TLD4S(u64 insn); + void TLDS(u64 insn); + void TMML(u64 insn); + void TMML_b(u64 insn); + void TXA(u64 insn); + void TXD(u64 insn); + void TXD_b(u64 insn); + void TXQ(u64 insn); + void TXQ_b(u64 insn); + void VABSDIFF(u64 insn); + void VABSDIFF4(u64 insn); + void VADD(u64 insn); + void VMAD(u64 insn); + void VMNMX(u64 insn); + void VOTE(u64 insn); + void VOTE_vtg(u64 insn); + void VSET(u64 insn); + void VSETP(u64 insn); + void VSHL(u64 insn); + void VSHR(u64 insn); + void XMAD_reg(u64 insn); + void XMAD_rc(u64 insn); + void XMAD_cr(u64 insn); + void XMAD_imm(u64 insn); + + [[nodiscard]] IR::U32 X(IR::Reg reg); + void X(IR::Reg dest_reg, const IR::U32& value); + + [[nodiscard]] IR::U32 GetCbuf(u64 insn); + + [[nodiscard]] IR::U32 GetImm(u64 insn); + + void SetZFlag(const IR::U1& value); + void SetSFlag(const IR::U1& value); + void SetCFlag(const IR::U1& value); + void SetOFlag(const IR::U1& value); + + void ResetZero(); + void ResetSFlag(); + void ResetCFlag(); + void ResetOFlag(); +}; + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..23512db1a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class InterpolationMode : u64 { + Pass = 0, + Multiply = 1, + Constant = 2, + Sc = 3, +}; + +enum class SampleMode : u64 { + Default = 0, + Centroid = 1, + Offset = 2, +}; +} // Anonymous namespace + +void TranslatorVisitor::IPA(u64 insn) { + // IPA is the instruction used to read varyings from a fragment shader. + // gl_FragCoord is mapped to the gl_Position attribute. + // It yields unknown results when used outside of the fragment shader stage. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 8, IR::Reg> multiplier; + BitField<30, 8, IR::Attribute> attribute; + BitField<38, 1, u64> idx; + BitField<51, 1, u64> sat; + BitField<52, 2, SampleMode> sample_mode; + BitField<54, 2, InterpolationMode> interpolation_mode; + } const ipa{insn}; + + // Indexed IPAs are used for indexed varyings. + // For example: + // + // in vec4 colors[4]; + // uniform int idx; + // void main() { + // gl_FragColor = colors[idx]; + // } + const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; + if (is_indexed) { + throw NotImplementedException("IPA.IDX"); + } + + const IR::Attribute attribute{ipa.attribute}; + IR::U32 value{ir.GetAttribute(attribute)}; + if (IR::IsGeneric(attribute)) { + // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; + const bool is_perspective{false}; + if (is_perspective) { + const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; + value = ir.FPMul(value, rcp_position_w); + } + } + + switch (ipa.interpolation_mode) { + case InterpolationMode::Pass: + break; + case InterpolationMode::Multiply: + value = ir.FPMul(value, ir.GetReg(ipa.multiplier)); + break; + case InterpolationMode::Constant: + throw NotImplementedException("IPA.CONSTANT"); + case InterpolationMode::Sc: + throw NotImplementedException("IPA.SC"); + } + + // Saturated IPAs are generally generated out of clamped varyings. + // For example: clamp(some_varying, 0.0, 1.0) + const bool is_saturated{ipa.sat != 0}; + if (is_saturated) { + if (attribute == IR::Attribute::FrontFace) { + throw NotImplementedException("IPA.SAT on FrontFace"); + } + value = ir.FPSaturate(value); + } + + ir.SetReg(ipa.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..d8fd387cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -0,0 +1,90 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class StoreSize : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class StoreCache : u64 { + WB, // Cache write-back all coherent levels + CG, // Cache at global level + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory) +}; +} // Anonymous namespace + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<45, 1, u64> e; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (stg.e == 0) { + // STG without .E uses a 32-bit pointer, zero-extend it + return ir.ConvertU(64, X(stg.addr_reg)); + } + if (!IR::IsAligned(stg.addr_reg, 2)) { + throw NotImplementedException("Unaligned address register"); + } + // Pack two registers to build the 32-bit address + return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); + }()}; + + switch (stg.size) { + case StoreSize::U8: + ir.WriteGlobalU8(address, X(stg.data_reg)); + break; + case StoreSize::S8: + ir.WriteGlobalS8(address, X(stg.data_reg)); + break; + case StoreSize::U16: + ir.WriteGlobalU16(address, X(stg.data_reg)); + break; + case StoreSize::S16: + ir.WriteGlobalS16(address, X(stg.data_reg)); + break; + case StoreSize::B32: + ir.WriteGlobal32(address, X(stg.data_reg)); + break; + case StoreSize::B64: { + if (!IR::IsAligned(stg.data_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; + ir.WriteGlobal64(address, vector); + break; + } + case StoreSize::B128: + if (!IR::IsAligned(stg.data_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), + X(stg.data_reg + 2), X(stg.data_reg + 3))}; + ir.WriteGlobal128(address, vector); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..c907c1ffb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -0,0 +1,1105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { + +[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { + auto raw{IR::DumpBlock(block)}; + + Optimization::GetSetElimination(block); + Optimization::DeadCodeEliminationPass(block); + Optimization::IdentityRemovalPass(block); + auto dumped{IR::DumpBlock(block)}; + + fmt::print(stderr, "{}", dumped); +} + +[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { + throw NotImplementedException("Instruction {} is not implemented", opcode); +} + +void TranslatorVisitor::AL2P(u64) { + ThrowNotImplemented(Opcode::AL2P); +} + +void TranslatorVisitor::ALD(u64) { + ThrowNotImplemented(Opcode::ALD); +} + +void TranslatorVisitor::AST(u64) { + ThrowNotImplemented(Opcode::AST); +} + +void TranslatorVisitor::ATOM_cas(u64) { + ThrowNotImplemented(Opcode::ATOM_cas); +} + +void TranslatorVisitor::ATOM(u64) { + ThrowNotImplemented(Opcode::ATOM); +} + +void TranslatorVisitor::ATOMS_cas(u64) { + ThrowNotImplemented(Opcode::ATOMS_cas); +} + +void TranslatorVisitor::ATOMS(u64) { + ThrowNotImplemented(Opcode::ATOMS); +} + +void TranslatorVisitor::B2R(u64) { + ThrowNotImplemented(Opcode::B2R); +} + +void TranslatorVisitor::BAR(u64) { + ThrowNotImplemented(Opcode::BAR); +} + +void TranslatorVisitor::BFE_reg(u64) { + ThrowNotImplemented(Opcode::BFE_reg); +} + +void TranslatorVisitor::BFE_cbuf(u64) { + ThrowNotImplemented(Opcode::BFE_cbuf); +} + +void TranslatorVisitor::BFE_imm(u64) { + ThrowNotImplemented(Opcode::BFE_imm); +} + +void TranslatorVisitor::BFI_reg(u64) { + ThrowNotImplemented(Opcode::BFI_reg); +} + +void TranslatorVisitor::BFI_rc(u64) { + ThrowNotImplemented(Opcode::BFI_rc); +} + +void TranslatorVisitor::BFI_cr(u64) { + ThrowNotImplemented(Opcode::BFI_cr); +} + +void TranslatorVisitor::BFI_imm(u64) { + ThrowNotImplemented(Opcode::BFI_imm); +} + +void TranslatorVisitor::BPT(u64) { + ThrowNotImplemented(Opcode::BPT); +} + +void TranslatorVisitor::BRA(u64) { + ThrowNotImplemented(Opcode::BRA); +} + +void TranslatorVisitor::BRK(u64) { + ThrowNotImplemented(Opcode::BRK); +} + +void TranslatorVisitor::BRX(u64) { + ThrowNotImplemented(Opcode::BRX); +} + +void TranslatorVisitor::CAL(u64) { + ThrowNotImplemented(Opcode::CAL); +} + +void TranslatorVisitor::CCTL(u64) { + ThrowNotImplemented(Opcode::CCTL); +} + +void TranslatorVisitor::CCTLL(u64) { + ThrowNotImplemented(Opcode::CCTLL); +} + +void TranslatorVisitor::CONT(u64) { + ThrowNotImplemented(Opcode::CONT); +} + +void TranslatorVisitor::CS2R(u64) { + ThrowNotImplemented(Opcode::CS2R); +} + +void TranslatorVisitor::CSET(u64) { + ThrowNotImplemented(Opcode::CSET); +} + +void TranslatorVisitor::CSETP(u64) { + ThrowNotImplemented(Opcode::CSETP); +} + +void TranslatorVisitor::DADD_reg(u64) { + ThrowNotImplemented(Opcode::DADD_reg); +} + +void TranslatorVisitor::DADD_cbuf(u64) { + ThrowNotImplemented(Opcode::DADD_cbuf); +} + +void TranslatorVisitor::DADD_imm(u64) { + ThrowNotImplemented(Opcode::DADD_imm); +} + +void TranslatorVisitor::DEPBAR(u64) { + ThrowNotImplemented(Opcode::DEPBAR); +} + +void TranslatorVisitor::DFMA_reg(u64) { + ThrowNotImplemented(Opcode::DFMA_reg); +} + +void TranslatorVisitor::DFMA_rc(u64) { + ThrowNotImplemented(Opcode::DFMA_rc); +} + +void TranslatorVisitor::DFMA_cr(u64) { + ThrowNotImplemented(Opcode::DFMA_cr); +} + +void TranslatorVisitor::DFMA_imm(u64) { + ThrowNotImplemented(Opcode::DFMA_imm); +} + +void TranslatorVisitor::DMNMX_reg(u64) { + ThrowNotImplemented(Opcode::DMNMX_reg); +} + +void TranslatorVisitor::DMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::DMNMX_cbuf); +} + +void TranslatorVisitor::DMNMX_imm(u64) { + ThrowNotImplemented(Opcode::DMNMX_imm); +} + +void TranslatorVisitor::DMUL_reg(u64) { + ThrowNotImplemented(Opcode::DMUL_reg); +} + +void TranslatorVisitor::DMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::DMUL_cbuf); +} + +void TranslatorVisitor::DMUL_imm(u64) { + ThrowNotImplemented(Opcode::DMUL_imm); +} + +void TranslatorVisitor::DSET_reg(u64) { + ThrowNotImplemented(Opcode::DSET_reg); +} + +void TranslatorVisitor::DSET_cbuf(u64) { + ThrowNotImplemented(Opcode::DSET_cbuf); +} + +void TranslatorVisitor::DSET_imm(u64) { + ThrowNotImplemented(Opcode::DSET_imm); +} + +void TranslatorVisitor::DSETP_reg(u64) { + ThrowNotImplemented(Opcode::DSETP_reg); +} + +void TranslatorVisitor::DSETP_cbuf(u64) { + ThrowNotImplemented(Opcode::DSETP_cbuf); +} + +void TranslatorVisitor::DSETP_imm(u64) { + ThrowNotImplemented(Opcode::DSETP_imm); +} + +void TranslatorVisitor::EXIT(u64) { + throw LogicError("Visting EXIT instruction"); +} + +void TranslatorVisitor::F2F_reg(u64) { + ThrowNotImplemented(Opcode::F2F_reg); +} + +void TranslatorVisitor::F2F_cbuf(u64) { + ThrowNotImplemented(Opcode::F2F_cbuf); +} + +void TranslatorVisitor::F2F_imm(u64) { + ThrowNotImplemented(Opcode::F2F_imm); +} + +void TranslatorVisitor::FADD_reg(u64) { + ThrowNotImplemented(Opcode::FADD_reg); +} + +void TranslatorVisitor::FADD_cbuf(u64) { + ThrowNotImplemented(Opcode::FADD_cbuf); +} + +void TranslatorVisitor::FADD_imm(u64) { + ThrowNotImplemented(Opcode::FADD_imm); +} + +void TranslatorVisitor::FADD32I(u64) { + ThrowNotImplemented(Opcode::FADD32I); +} + +void TranslatorVisitor::FCHK_reg(u64) { + ThrowNotImplemented(Opcode::FCHK_reg); +} + +void TranslatorVisitor::FCHK_cbuf(u64) { + ThrowNotImplemented(Opcode::FCHK_cbuf); +} + +void TranslatorVisitor::FCHK_imm(u64) { + ThrowNotImplemented(Opcode::FCHK_imm); +} + +void TranslatorVisitor::FCMP_reg(u64) { + ThrowNotImplemented(Opcode::FCMP_reg); +} + +void TranslatorVisitor::FCMP_rc(u64) { + ThrowNotImplemented(Opcode::FCMP_rc); +} + +void TranslatorVisitor::FCMP_cr(u64) { + ThrowNotImplemented(Opcode::FCMP_cr); +} + +void TranslatorVisitor::FCMP_imm(u64) { + ThrowNotImplemented(Opcode::FCMP_imm); +} + +void TranslatorVisitor::FFMA_reg(u64) { + ThrowNotImplemented(Opcode::FFMA_reg); +} + +void TranslatorVisitor::FFMA_rc(u64) { + ThrowNotImplemented(Opcode::FFMA_rc); +} + +void TranslatorVisitor::FFMA_cr(u64) { + ThrowNotImplemented(Opcode::FFMA_cr); +} + +void TranslatorVisitor::FFMA_imm(u64) { + ThrowNotImplemented(Opcode::FFMA_imm); +} + +void TranslatorVisitor::FFMA32I(u64) { + ThrowNotImplemented(Opcode::FFMA32I); +} + +void TranslatorVisitor::FLO_reg(u64) { + ThrowNotImplemented(Opcode::FLO_reg); +} + +void TranslatorVisitor::FLO_cbuf(u64) { + ThrowNotImplemented(Opcode::FLO_cbuf); +} + +void TranslatorVisitor::FLO_imm(u64) { + ThrowNotImplemented(Opcode::FLO_imm); +} + +void TranslatorVisitor::FMNMX_reg(u64) { + ThrowNotImplemented(Opcode::FMNMX_reg); +} + +void TranslatorVisitor::FMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::FMNMX_cbuf); +} + +void TranslatorVisitor::FMNMX_imm(u64) { + ThrowNotImplemented(Opcode::FMNMX_imm); +} + +void TranslatorVisitor::FMUL_reg(u64) { + ThrowNotImplemented(Opcode::FMUL_reg); +} + +void TranslatorVisitor::FMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::FMUL_cbuf); +} + +void TranslatorVisitor::FMUL_imm(u64) { + ThrowNotImplemented(Opcode::FMUL_imm); +} + +void TranslatorVisitor::FMUL32I(u64) { + ThrowNotImplemented(Opcode::FMUL32I); +} + +void TranslatorVisitor::FSET_reg(u64) { + ThrowNotImplemented(Opcode::FSET_reg); +} + +void TranslatorVisitor::FSET_cbuf(u64) { + ThrowNotImplemented(Opcode::FSET_cbuf); +} + +void TranslatorVisitor::FSET_imm(u64) { + ThrowNotImplemented(Opcode::FSET_imm); +} + +void TranslatorVisitor::FSETP_reg(u64) { + ThrowNotImplemented(Opcode::FSETP_reg); +} + +void TranslatorVisitor::FSETP_cbuf(u64) { + ThrowNotImplemented(Opcode::FSETP_cbuf); +} + +void TranslatorVisitor::FSETP_imm(u64) { + ThrowNotImplemented(Opcode::FSETP_imm); +} + +void TranslatorVisitor::FSWZADD(u64) { + ThrowNotImplemented(Opcode::FSWZADD); +} + +void TranslatorVisitor::GETCRSPTR(u64) { + ThrowNotImplemented(Opcode::GETCRSPTR); +} + +void TranslatorVisitor::GETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::GETLMEMBASE); +} + +void TranslatorVisitor::HADD2_reg(u64) { + ThrowNotImplemented(Opcode::HADD2_reg); +} + +void TranslatorVisitor::HADD2_cbuf(u64) { + ThrowNotImplemented(Opcode::HADD2_cbuf); +} + +void TranslatorVisitor::HADD2_imm(u64) { + ThrowNotImplemented(Opcode::HADD2_imm); +} + +void TranslatorVisitor::HADD2_32I(u64) { + ThrowNotImplemented(Opcode::HADD2_32I); +} + +void TranslatorVisitor::HFMA2_reg(u64) { + ThrowNotImplemented(Opcode::HFMA2_reg); +} + +void TranslatorVisitor::HFMA2_rc(u64) { + ThrowNotImplemented(Opcode::HFMA2_rc); +} + +void TranslatorVisitor::HFMA2_cr(u64) { + ThrowNotImplemented(Opcode::HFMA2_cr); +} + +void TranslatorVisitor::HFMA2_imm(u64) { + ThrowNotImplemented(Opcode::HFMA2_imm); +} + +void TranslatorVisitor::HFMA2_32I(u64) { + ThrowNotImplemented(Opcode::HFMA2_32I); +} + +void TranslatorVisitor::HMUL2_reg(u64) { + ThrowNotImplemented(Opcode::HMUL2_reg); +} + +void TranslatorVisitor::HMUL2_cbuf(u64) { + ThrowNotImplemented(Opcode::HMUL2_cbuf); +} + +void TranslatorVisitor::HMUL2_imm(u64) { + ThrowNotImplemented(Opcode::HMUL2_imm); +} + +void TranslatorVisitor::HMUL2_32I(u64) { + ThrowNotImplemented(Opcode::HMUL2_32I); +} + +void TranslatorVisitor::HSET2_reg(u64) { + ThrowNotImplemented(Opcode::HSET2_reg); +} + +void TranslatorVisitor::HSET2_cbuf(u64) { + ThrowNotImplemented(Opcode::HSET2_cbuf); +} + +void TranslatorVisitor::HSET2_imm(u64) { + ThrowNotImplemented(Opcode::HSET2_imm); +} + +void TranslatorVisitor::HSETP2_reg(u64) { + ThrowNotImplemented(Opcode::HSETP2_reg); +} + +void TranslatorVisitor::HSETP2_cbuf(u64) { + ThrowNotImplemented(Opcode::HSETP2_cbuf); +} + +void TranslatorVisitor::HSETP2_imm(u64) { + ThrowNotImplemented(Opcode::HSETP2_imm); +} + +void TranslatorVisitor::I2F_reg(u64) { + ThrowNotImplemented(Opcode::I2F_reg); +} + +void TranslatorVisitor::I2F_cbuf(u64) { + ThrowNotImplemented(Opcode::I2F_cbuf); +} + +void TranslatorVisitor::I2F_imm(u64) { + ThrowNotImplemented(Opcode::I2F_imm); +} + +void TranslatorVisitor::I2I_reg(u64) { + ThrowNotImplemented(Opcode::I2I_reg); +} + +void TranslatorVisitor::I2I_cbuf(u64) { + ThrowNotImplemented(Opcode::I2I_cbuf); +} + +void TranslatorVisitor::I2I_imm(u64) { + ThrowNotImplemented(Opcode::I2I_imm); +} + +void TranslatorVisitor::IADD_reg(u64) { + ThrowNotImplemented(Opcode::IADD_reg); +} + +void TranslatorVisitor::IADD_cbuf(u64) { + ThrowNotImplemented(Opcode::IADD_cbuf); +} + +void TranslatorVisitor::IADD_imm(u64) { + ThrowNotImplemented(Opcode::IADD_imm); +} + +void TranslatorVisitor::IADD3_reg(u64) { + ThrowNotImplemented(Opcode::IADD3_reg); +} + +void TranslatorVisitor::IADD3_cbuf(u64) { + ThrowNotImplemented(Opcode::IADD3_cbuf); +} + +void TranslatorVisitor::IADD3_imm(u64) { + ThrowNotImplemented(Opcode::IADD3_imm); +} + +void TranslatorVisitor::IADD32I(u64) { + ThrowNotImplemented(Opcode::IADD32I); +} + +void TranslatorVisitor::ICMP_reg(u64) { + ThrowNotImplemented(Opcode::ICMP_reg); +} + +void TranslatorVisitor::ICMP_rc(u64) { + ThrowNotImplemented(Opcode::ICMP_rc); +} + +void TranslatorVisitor::ICMP_cr(u64) { + ThrowNotImplemented(Opcode::ICMP_cr); +} + +void TranslatorVisitor::ICMP_imm(u64) { + ThrowNotImplemented(Opcode::ICMP_imm); +} + +void TranslatorVisitor::IDE(u64) { + ThrowNotImplemented(Opcode::IDE); +} + +void TranslatorVisitor::IDP_reg(u64) { + ThrowNotImplemented(Opcode::IDP_reg); +} + +void TranslatorVisitor::IDP_imm(u64) { + ThrowNotImplemented(Opcode::IDP_imm); +} + +void TranslatorVisitor::IMAD_reg(u64) { + ThrowNotImplemented(Opcode::IMAD_reg); +} + +void TranslatorVisitor::IMAD_rc(u64) { + ThrowNotImplemented(Opcode::IMAD_rc); +} + +void TranslatorVisitor::IMAD_cr(u64) { + ThrowNotImplemented(Opcode::IMAD_cr); +} + +void TranslatorVisitor::IMAD_imm(u64) { + ThrowNotImplemented(Opcode::IMAD_imm); +} + +void TranslatorVisitor::IMAD32I(u64) { + ThrowNotImplemented(Opcode::IMAD32I); +} + +void TranslatorVisitor::IMADSP_reg(u64) { + ThrowNotImplemented(Opcode::IMADSP_reg); +} + +void TranslatorVisitor::IMADSP_rc(u64) { + ThrowNotImplemented(Opcode::IMADSP_rc); +} + +void TranslatorVisitor::IMADSP_cr(u64) { + ThrowNotImplemented(Opcode::IMADSP_cr); +} + +void TranslatorVisitor::IMADSP_imm(u64) { + ThrowNotImplemented(Opcode::IMADSP_imm); +} + +void TranslatorVisitor::IMNMX_reg(u64) { + ThrowNotImplemented(Opcode::IMNMX_reg); +} + +void TranslatorVisitor::IMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::IMNMX_cbuf); +} + +void TranslatorVisitor::IMNMX_imm(u64) { + ThrowNotImplemented(Opcode::IMNMX_imm); +} + +void TranslatorVisitor::IMUL_reg(u64) { + ThrowNotImplemented(Opcode::IMUL_reg); +} + +void TranslatorVisitor::IMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::IMUL_cbuf); +} + +void TranslatorVisitor::IMUL_imm(u64) { + ThrowNotImplemented(Opcode::IMUL_imm); +} + +void TranslatorVisitor::IMUL32I(u64) { + ThrowNotImplemented(Opcode::IMUL32I); +} + +void TranslatorVisitor::ISBERD(u64) { + ThrowNotImplemented(Opcode::ISBERD); +} + +void TranslatorVisitor::ISCADD_reg(u64) { + ThrowNotImplemented(Opcode::ISCADD_reg); +} + +void TranslatorVisitor::ISCADD_cbuf(u64) { + ThrowNotImplemented(Opcode::ISCADD_cbuf); +} + +void TranslatorVisitor::ISCADD_imm(u64) { + ThrowNotImplemented(Opcode::ISCADD_imm); +} + +void TranslatorVisitor::ISCADD32I(u64) { + ThrowNotImplemented(Opcode::ISCADD32I); +} + +void TranslatorVisitor::ISET_reg(u64) { + ThrowNotImplemented(Opcode::ISET_reg); +} + +void TranslatorVisitor::ISET_cbuf(u64) { + ThrowNotImplemented(Opcode::ISET_cbuf); +} + +void TranslatorVisitor::ISET_imm(u64) { + ThrowNotImplemented(Opcode::ISET_imm); +} + +void TranslatorVisitor::ISETP_reg(u64) { + ThrowNotImplemented(Opcode::ISETP_reg); +} + +void TranslatorVisitor::ISETP_cbuf(u64) { + ThrowNotImplemented(Opcode::ISETP_cbuf); +} + +void TranslatorVisitor::ISETP_imm(u64) { + ThrowNotImplemented(Opcode::ISETP_imm); +} + +void TranslatorVisitor::JCAL(u64) { + ThrowNotImplemented(Opcode::JCAL); +} + +void TranslatorVisitor::JMP(u64) { + ThrowNotImplemented(Opcode::JMP); +} + +void TranslatorVisitor::JMX(u64) { + ThrowNotImplemented(Opcode::JMX); +} + +void TranslatorVisitor::KIL(u64) { + ThrowNotImplemented(Opcode::KIL); +} + +void TranslatorVisitor::LD(u64) { + ThrowNotImplemented(Opcode::LD); +} + +void TranslatorVisitor::LDC(u64) { + ThrowNotImplemented(Opcode::LDC); +} + +void TranslatorVisitor::LDG(u64) { + ThrowNotImplemented(Opcode::LDG); +} + +void TranslatorVisitor::LDL(u64) { + ThrowNotImplemented(Opcode::LDL); +} + +void TranslatorVisitor::LDS(u64) { + ThrowNotImplemented(Opcode::LDS); +} + +void TranslatorVisitor::LEA_hi_reg(u64) { + ThrowNotImplemented(Opcode::LEA_hi_reg); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64) { + ThrowNotImplemented(Opcode::LEA_hi_cbuf); +} + +void TranslatorVisitor::LEA_lo_reg(u64) { + ThrowNotImplemented(Opcode::LEA_lo_reg); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64) { + ThrowNotImplemented(Opcode::LEA_lo_cbuf); +} + +void TranslatorVisitor::LEA_lo_imm(u64) { + ThrowNotImplemented(Opcode::LEA_lo_imm); +} + +void TranslatorVisitor::LEPC(u64) { + ThrowNotImplemented(Opcode::LEPC); +} + +void TranslatorVisitor::LONGJMP(u64) { + ThrowNotImplemented(Opcode::LONGJMP); +} + +void TranslatorVisitor::LOP_reg(u64) { + ThrowNotImplemented(Opcode::LOP_reg); +} + +void TranslatorVisitor::LOP_cbuf(u64) { + ThrowNotImplemented(Opcode::LOP_cbuf); +} + +void TranslatorVisitor::LOP_imm(u64) { + ThrowNotImplemented(Opcode::LOP_imm); +} + +void TranslatorVisitor::LOP3_reg(u64) { + ThrowNotImplemented(Opcode::LOP3_reg); +} + +void TranslatorVisitor::LOP3_cbuf(u64) { + ThrowNotImplemented(Opcode::LOP3_cbuf); +} + +void TranslatorVisitor::LOP3_imm(u64) { + ThrowNotImplemented(Opcode::LOP3_imm); +} + +void TranslatorVisitor::LOP32I(u64) { + ThrowNotImplemented(Opcode::LOP32I); +} + +void TranslatorVisitor::MEMBAR(u64) { + ThrowNotImplemented(Opcode::MEMBAR); +} + +void TranslatorVisitor::MOV32I(u64) { + ThrowNotImplemented(Opcode::MOV32I); +} + +void TranslatorVisitor::NOP(u64) { + ThrowNotImplemented(Opcode::NOP); +} + +void TranslatorVisitor::OUT_reg(u64) { + ThrowNotImplemented(Opcode::OUT_reg); +} + +void TranslatorVisitor::OUT_cbuf(u64) { + ThrowNotImplemented(Opcode::OUT_cbuf); +} + +void TranslatorVisitor::OUT_imm(u64) { + ThrowNotImplemented(Opcode::OUT_imm); +} + +void TranslatorVisitor::P2R_reg(u64) { + ThrowNotImplemented(Opcode::P2R_reg); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + ThrowNotImplemented(Opcode::P2R_cbuf); +} + +void TranslatorVisitor::P2R_imm(u64) { + ThrowNotImplemented(Opcode::P2R_imm); +} + +void TranslatorVisitor::PBK(u64) { + // PBK is a no-op +} + +void TranslatorVisitor::PCNT(u64) { + ThrowNotImplemented(Opcode::PCNT); +} + +void TranslatorVisitor::PEXIT(u64) { + ThrowNotImplemented(Opcode::PEXIT); +} + +void TranslatorVisitor::PIXLD(u64) { + ThrowNotImplemented(Opcode::PIXLD); +} + +void TranslatorVisitor::PLONGJMP(u64) { + ThrowNotImplemented(Opcode::PLONGJMP); +} + +void TranslatorVisitor::POPC_reg(u64) { + ThrowNotImplemented(Opcode::POPC_reg); +} + +void TranslatorVisitor::POPC_cbuf(u64) { + ThrowNotImplemented(Opcode::POPC_cbuf); +} + +void TranslatorVisitor::POPC_imm(u64) { + ThrowNotImplemented(Opcode::POPC_imm); +} + +void TranslatorVisitor::PRET(u64) { + ThrowNotImplemented(Opcode::PRET); +} + +void TranslatorVisitor::PRMT_reg(u64) { + ThrowNotImplemented(Opcode::PRMT_reg); +} + +void TranslatorVisitor::PRMT_rc(u64) { + ThrowNotImplemented(Opcode::PRMT_rc); +} + +void TranslatorVisitor::PRMT_cr(u64) { + ThrowNotImplemented(Opcode::PRMT_cr); +} + +void TranslatorVisitor::PRMT_imm(u64) { + ThrowNotImplemented(Opcode::PRMT_imm); +} + +void TranslatorVisitor::PSET(u64) { + ThrowNotImplemented(Opcode::PSET); +} + +void TranslatorVisitor::PSETP(u64) { + ThrowNotImplemented(Opcode::PSETP); +} + +void TranslatorVisitor::R2B(u64) { + ThrowNotImplemented(Opcode::R2B); +} + +void TranslatorVisitor::R2P_reg(u64) { + ThrowNotImplemented(Opcode::R2P_reg); +} + +void TranslatorVisitor::R2P_cbuf(u64) { + ThrowNotImplemented(Opcode::R2P_cbuf); +} + +void TranslatorVisitor::R2P_imm(u64) { + ThrowNotImplemented(Opcode::R2P_imm); +} + +void TranslatorVisitor::RAM(u64) { + ThrowNotImplemented(Opcode::RAM); +} + +void TranslatorVisitor::RED(u64) { + ThrowNotImplemented(Opcode::RED); +} + +void TranslatorVisitor::RET(u64) { + ThrowNotImplemented(Opcode::RET); +} + +void TranslatorVisitor::RRO_reg(u64) { + ThrowNotImplemented(Opcode::RRO_reg); +} + +void TranslatorVisitor::RRO_cbuf(u64) { + ThrowNotImplemented(Opcode::RRO_cbuf); +} + +void TranslatorVisitor::RRO_imm(u64) { + ThrowNotImplemented(Opcode::RRO_imm); +} + +void TranslatorVisitor::RTT(u64) { + ThrowNotImplemented(Opcode::RTT); +} + +void TranslatorVisitor::S2R(u64) { + ThrowNotImplemented(Opcode::S2R); +} + +void TranslatorVisitor::SAM(u64) { + ThrowNotImplemented(Opcode::SAM); +} + +void TranslatorVisitor::SEL_reg(u64) { + ThrowNotImplemented(Opcode::SEL_reg); +} + +void TranslatorVisitor::SEL_cbuf(u64) { + ThrowNotImplemented(Opcode::SEL_cbuf); +} + +void TranslatorVisitor::SEL_imm(u64) { + ThrowNotImplemented(Opcode::SEL_imm); +} + +void TranslatorVisitor::SETCRSPTR(u64) { + ThrowNotImplemented(Opcode::SETCRSPTR); +} + +void TranslatorVisitor::SETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::SETLMEMBASE); +} + +void TranslatorVisitor::SHF_l_reg(u64) { + ThrowNotImplemented(Opcode::SHF_l_reg); +} + +void TranslatorVisitor::SHF_l_imm(u64) { + ThrowNotImplemented(Opcode::SHF_l_imm); +} + +void TranslatorVisitor::SHF_r_reg(u64) { + ThrowNotImplemented(Opcode::SHF_r_reg); +} + +void TranslatorVisitor::SHF_r_imm(u64) { + ThrowNotImplemented(Opcode::SHF_r_imm); +} + +void TranslatorVisitor::SHFL(u64) { + ThrowNotImplemented(Opcode::SHFL); +} + +void TranslatorVisitor::SHL_reg(u64) { + ThrowNotImplemented(Opcode::SHL_reg); +} + +void TranslatorVisitor::SHL_cbuf(u64) { + ThrowNotImplemented(Opcode::SHL_cbuf); +} + +void TranslatorVisitor::SHL_imm(u64) { + ThrowNotImplemented(Opcode::SHL_imm); +} + +void TranslatorVisitor::SHR_reg(u64) { + ThrowNotImplemented(Opcode::SHR_reg); +} + +void TranslatorVisitor::SHR_cbuf(u64) { + ThrowNotImplemented(Opcode::SHR_cbuf); +} + +void TranslatorVisitor::SHR_imm(u64) { + ThrowNotImplemented(Opcode::SHR_imm); +} + +void TranslatorVisitor::SSY(u64) { + ThrowNotImplemented(Opcode::SSY); +} + +void TranslatorVisitor::ST(u64) { + ThrowNotImplemented(Opcode::ST); +} + +void TranslatorVisitor::STL(u64) { + ThrowNotImplemented(Opcode::STL); +} + +void TranslatorVisitor::STP(u64) { + ThrowNotImplemented(Opcode::STP); +} + +void TranslatorVisitor::STS(u64) { + ThrowNotImplemented(Opcode::STS); +} + +void TranslatorVisitor::SUATOM_cas(u64) { + ThrowNotImplemented(Opcode::SUATOM_cas); +} + +void TranslatorVisitor::SULD(u64) { + ThrowNotImplemented(Opcode::SULD); +} + +void TranslatorVisitor::SURED(u64) { + ThrowNotImplemented(Opcode::SURED); +} + +void TranslatorVisitor::SUST(u64) { + ThrowNotImplemented(Opcode::SUST); +} + +void TranslatorVisitor::SYNC(u64) { + ThrowNotImplemented(Opcode::SYNC); +} + +void TranslatorVisitor::TEX(u64) { + ThrowNotImplemented(Opcode::TEX); +} + +void TranslatorVisitor::TEX_b(u64) { + ThrowNotImplemented(Opcode::TEX_b); +} + +void TranslatorVisitor::TEXS(u64) { + ThrowNotImplemented(Opcode::TEXS); +} + +void TranslatorVisitor::TLD(u64) { + ThrowNotImplemented(Opcode::TLD); +} + +void TranslatorVisitor::TLD_b(u64) { + ThrowNotImplemented(Opcode::TLD_b); +} + +void TranslatorVisitor::TLD4(u64) { + ThrowNotImplemented(Opcode::TLD4); +} + +void TranslatorVisitor::TLD4_b(u64) { + ThrowNotImplemented(Opcode::TLD4_b); +} + +void TranslatorVisitor::TLD4S(u64) { + ThrowNotImplemented(Opcode::TLD4S); +} + +void TranslatorVisitor::TLDS(u64) { + ThrowNotImplemented(Opcode::TLDS); +} + +void TranslatorVisitor::TMML(u64) { + ThrowNotImplemented(Opcode::TMML); +} + +void TranslatorVisitor::TMML_b(u64) { + ThrowNotImplemented(Opcode::TMML_b); +} + +void TranslatorVisitor::TXA(u64) { + ThrowNotImplemented(Opcode::TXA); +} + +void TranslatorVisitor::TXD(u64) { + ThrowNotImplemented(Opcode::TXD); +} + +void TranslatorVisitor::TXD_b(u64) { + ThrowNotImplemented(Opcode::TXD_b); +} + +void TranslatorVisitor::TXQ(u64) { + ThrowNotImplemented(Opcode::TXQ); +} + +void TranslatorVisitor::TXQ_b(u64) { + ThrowNotImplemented(Opcode::TXQ_b); +} + +void TranslatorVisitor::VABSDIFF(u64) { + ThrowNotImplemented(Opcode::VABSDIFF); +} + +void TranslatorVisitor::VABSDIFF4(u64) { + ThrowNotImplemented(Opcode::VABSDIFF4); +} + +void TranslatorVisitor::VADD(u64) { + ThrowNotImplemented(Opcode::VADD); +} + +void TranslatorVisitor::VMAD(u64) { + ThrowNotImplemented(Opcode::VMAD); +} + +void TranslatorVisitor::VMNMX(u64) { + ThrowNotImplemented(Opcode::VMNMX); +} + +void TranslatorVisitor::VOTE(u64) { + ThrowNotImplemented(Opcode::VOTE); +} + +void TranslatorVisitor::VOTE_vtg(u64) { + ThrowNotImplemented(Opcode::VOTE_vtg); +} + +void TranslatorVisitor::VSET(u64) { + ThrowNotImplemented(Opcode::VSET); +} + +void TranslatorVisitor::VSETP(u64) { + ThrowNotImplemented(Opcode::VSETP); +} + +void TranslatorVisitor::VSHL(u64) { + ThrowNotImplemented(Opcode::VSHL); +} + +void TranslatorVisitor::VSHR(u64) { + ThrowNotImplemented(Opcode::VSHR); +} + +void TranslatorVisitor::XMAD_reg(u64) { + ThrowNotImplemented(Opcode::XMAD_reg); +} + +void TranslatorVisitor::XMAD_rc(u64) { + ThrowNotImplemented(Opcode::XMAD_rc); +} + +void TranslatorVisitor::XMAD_cr(u64) { + ThrowNotImplemented(Opcode::XMAD_cr); +} + +void TranslatorVisitor::XMAD_imm(u64) { + ThrowNotImplemented(Opcode::XMAD_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp new file mode 100644 index 000000000..7fa35ba3a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +union MOV { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> src_reg; + BitField<39, 4, u64> mask; +}; + +void CheckMask(MOV mov) { + if (mov.mask != 0xf) { + throw NotImplementedException("Non-full move mask"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, X(mov.src_reg)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetImm(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..66a306745 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +template +static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { + using MethodType = decltype(visitor_method); + if constexpr (std::is_invocable_r_v) { + (visitor.*visitor_method)(pc, insn); + } else if constexpr (std::is_invocable_r_v) { + (visitor.*visitor_method)(insn); + } else { + (visitor.*visitor_method)(); + } +} + +IR::Block Translate(Environment& env, const Flow::Block& flow_block) { + IR::Block block{flow_block.begin.Offset(), flow_block.end.Offset()}; + TranslatorVisitor visitor{env, block}; + + const Location pc_end{flow_block.end}; + Location pc{flow_block.begin}; + while (pc != pc_end) { + const u64 insn{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(insn)}; + switch (opcode) { +#define INST(name, cute, mask) \ + case Opcode::name: \ + Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ + break; +#include "shader_recompiler/frontend/maxwell/maxwell.inc" +#undef OPCODE + default: + throw LogicError("Invalid opcode {}", opcode); + } + ++pc; + } + return block; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..788742dea --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Block Translate(Environment& env, const Flow::Block& flow_block); + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 6c4cc0cd062fbbba5349da1108d3c23cb330ca8a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 2 Feb 2021 21:07:00 -0300 Subject: shader: SSA and dominance --- .../frontend/maxwell/control_flow.cpp | 130 ++++++++++++++++++++- .../frontend/maxwell/control_flow.h | 44 ++++++- src/shader_recompiler/frontend/maxwell/program.cpp | 75 +++++++----- src/shader_recompiler/frontend/maxwell/program.h | 11 +- .../frontend/maxwell/termination_code.cpp | 7 ++ .../frontend/maxwell/termination_code.h | 1 + .../frontend/maxwell/translate/impl/impl.h | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 6 +- 8 files changed, 223 insertions(+), 55 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index fc4dba826..21ee98137 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -36,6 +36,7 @@ static std::array Split(Block&& block, Location pc, BlockId new_id) { .cond{true}, .branch_true{new_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }, Block{ .begin{pc}, @@ -46,6 +47,7 @@ static std::array Split(Block&& block, Location pc, BlockId new_id) { .cond{block.cond}, .branch_true{block.branch_true}, .branch_false{block.branch_false}, + .imm_predecessors{}, }, }; } @@ -108,7 +110,7 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string Name(const Block& block) { +static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.id); } else { @@ -154,13 +156,127 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(Location start_address) - : entrypoint{start_address}, labels{Label{ + : entrypoint{start_address}, labels{{ .address{start_address}, .block_id{0}, .stack{}, }} {} +void Function::BuildBlocksMap() { + const size_t num_blocks{NumBlocks()}; + blocks_map.resize(num_blocks); + for (size_t block_index = 0; block_index < num_blocks; ++block_index) { + Block& block{blocks_data[block_index]}; + blocks_map[block.id] = █ + } +} + +void Function::BuildImmediatePredecessors() { + for (const Block& block : blocks_data) { + if (block.branch_true != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); + } + if (block.branch_false != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); + } + } +} + +void Function::BuildPostOrder() { + boost::container::small_vector block_stack; + post_order_map.resize(NumBlocks()); + + Block& first_block{blocks_data[blocks.front()]}; + first_block.post_order_visited = true; + block_stack.push_back(first_block.id); + + const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { + if (branch_id == UNREACHABLE_BLOCK_ID) { + return false; + } + if (blocks_map[branch_id]->post_order_visited) { + return false; + } + blocks_map[branch_id]->post_order_visited = true; + + // Calling push_back twice is faster than insert on msvc + block_stack.push_back(block_id); + block_stack.push_back(branch_id); + return true; + }; + while (!block_stack.empty()) { + const Block* const block{blocks_map[block_stack.back()]}; + block_stack.pop_back(); + + if (!visit_branch(block->id, block->branch_true) && + !visit_branch(block->id, block->branch_false)) { + post_order_map[block->id] = static_cast(post_order_blocks.size()); + post_order_blocks.push_back(block->id); + } + } +} + +void Function::BuildImmediateDominators() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; + auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; + auto intersect{[this](Block* finger1, Block* finger2) { + while (finger1 != finger2) { + while (post_order_map[finger1->id] < post_order_map[finger2->id]) { + finger1 = finger1->imm_dominator; + } + while (post_order_map[finger2->id] < post_order_map[finger1->id]) { + finger2 = finger2->imm_dominator; + } + } + return finger1; + }}; + for (Block& block : blocks_data) { + block.imm_dominator = nullptr; + } + Block* const start_block{&blocks_data[blocks.front()]}; + start_block->imm_dominator = start_block; + + bool changed{true}; + while (changed) { + changed = false; + for (Block* const block : post_order_blocks | reverse_order_but_first) { + Block* new_idom{}; + for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { + new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; + } + changed |= block->imm_dominator != new_idom; + block->imm_dominator = new_idom; + } + } +} + +void Function::BuildDominanceFrontier() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; + for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { + for (Block* current : block.imm_predecessors | transform_block_id) { + while (current != block.imm_dominator) { + current->dominance_frontiers.push_back(current->id); + current = current->imm_dominator; + } + } + } +} + CFG::CFG(Environment& env_, Location start_address) : env{env_} { + VisitFunctions(start_address); + + for (Function& function : functions) { + function.BuildBlocksMap(); + function.BuildImmediatePredecessors(); + function.BuildPostOrder(); + function.BuildImmediateDominators(); + function.BuildDominanceFrontier(); + } +} + +void CFG::VisitFunctions(Location start_address) { functions.emplace_back(start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -202,6 +318,7 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { .cond{true}, .branch_true{UNREACHABLE_BLOCK_ID}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; @@ -310,7 +427,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.push_back(cal_pc); + functions.emplace_back(cal_pc); } // Handle CAL like a regular instruction break; @@ -352,6 +469,7 @@ void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, })}; // Set the end properties of the conditional instruction and give it a new identity Block& conditional_block{block}; @@ -465,14 +583,14 @@ std::string CFG::Dot() const { dot += fmt::format("\t\tnode [style=filled];\n"); for (const u32 block_index : function.blocks) { const Block& block{function.blocks_data[block_index]}; - const std::string name{Name(block)}; + const std::string name{NameOf(block)}; const auto add_branch = [&](BlockId branch_id, bool add_label) { const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; dot += fmt::format("\t\t{}->", name); if (it == function.blocks_data.end()) { dot += fmt::format("\"Unknown label {}\"", branch_id); } else { - dot += Name(*it); + dot += NameOf(*it); }; if (add_label && block.cond != true && block.cond != false) { dot += fmt::format(" [label=\"{}\"]", block.cond); @@ -520,7 +638,7 @@ std::string CFG::Dot() const { if (functions.front().blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index b2ab0cdc3..20ada8afd 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -70,6 +70,12 @@ struct Block { IR::Condition cond; BlockId branch_true; BlockId branch_false; + boost::container::small_vector imm_predecessors; + boost::container::small_vector dominance_frontiers; + union { + bool post_order_visited{false}; + Block* imm_dominator; + }; }; struct Label { @@ -81,11 +87,30 @@ struct Label { struct Function { Function(Location start_address); + void BuildBlocksMap(); + + void BuildImmediatePredecessors(); + + void BuildPostOrder(); + + void BuildImmediateDominators(); + + void BuildDominanceFrontier(); + + [[nodiscard]] size_t NumBlocks() const noexcept { + return static_cast(current_block_id) + 1; + } + Location entrypoint; BlockId current_block_id{0}; boost::container::small_vector labels; boost::container::small_vector blocks; boost::container::small_vector blocks_data; + // Translates from BlockId to block index + boost::container::small_vector blocks_map; + + boost::container::small_vector post_order_blocks; + boost::container::small_vector post_order_map; }; class CFG { @@ -97,6 +122,12 @@ class CFG { public: explicit CFG(Environment& env, Location start_address); + CFG& operator=(const CFG&) = delete; + CFG(const CFG&) = delete; + + CFG& operator=(CFG&&) = delete; + CFG(CFG&&) = delete; + [[nodiscard]] std::string Dot() const; [[nodiscard]] std::span Functions() const noexcept { @@ -104,20 +135,22 @@ public: } private: + void VisitFunctions(Location start_address); + void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited - [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, - Instruction inst, Opcode opcode); + bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode); void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); @@ -126,8 +159,7 @@ private: AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, - FunctionId function_id); + BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); Environment& env; boost::container::small_vector functions; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 67a98ba57..49d1f4bfb 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,40 +8,53 @@ #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { +namespace { +void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span block_map, IR::Block* block_memory) { + const size_t num_blocks{cfg_function.blocks.size()}; + function.blocks.reserve(num_blocks); -Program::Function::~Function() { - std::ranges::for_each(blocks, &std::destroy_at); -} - -Program::Program(Environment& env, const Flow::CFG& cfg) { - std::vector block_map; - functions.reserve(cfg.Functions().size()); + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - for (const Flow::Function& cfg_function : cfg.Functions()) { - Function& function{functions.emplace_back()}; + function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); + block_map[flow_block.id] = function.blocks.back().get(); + ++block_memory; + } +} - const size_t num_blocks{cfg_function.blocks.size()}; - IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; - function.blocks.reserve(num_blocks); +void EmitTerminationInsts(const Flow::Function& cfg_function, + std::span block_map) { + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } +} - block_map.resize(cfg_function.blocks_data.size()); +void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + IR::Block* block_memory) { + std::vector block_map; + block_map.resize(cfg_function.blocks_data.size()); - // Visit the instructions of all blocks - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + TranslateCode(env, cfg_function, function, block_map, block_memory); + EmitTerminationInsts(cfg_function, block_map); +} +} // Anonymous namespace - IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; - ++block_memory; - function.blocks.push_back(block); - block_map[flow_block.id] = block; - } - // Now that all blocks are defined, emit the termination instructions - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } +Program::Program(Environment& env, const Flow::CFG& cfg) { + functions.reserve(cfg.Functions().size()); + for (const Flow::Function& cfg_function : cfg.Functions()) { + TranslateFunction(env, cfg_function, functions.emplace_back(), + block_alloc_pool.allocate(cfg_function.blocks.size())); + } + std::ranges::for_each(functions, Optimization::SsaRewritePass); + for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); + Optimization::Invoke(Optimization::IdentityRemovalPass, function); + // Optimization::Invoke(Optimization::VerificationPass, function); } } @@ -50,16 +63,16 @@ std::string DumpProgram(const Program& program) { std::map inst_to_index; std::map block_to_index; - for (const Program::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); + for (const IR::Function& function : program.functions) { + for (const auto& block : function.blocks) { + block_to_index.emplace(block.get(), index); ++index; } } std::string ret; - for (const Program::Function& function : program.functions) { + for (const IR::Function& function : program.functions) { ret += fmt::format("Function\n"); - for (const IR::Block* const block : function.blocks) { + for (const auto& block : function.blocks) { ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } } diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 7814b2c01..36e678a9e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -4,13 +4,16 @@ #pragma once +#include #include #include +#include #include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" namespace Shader::Maxwell { @@ -22,16 +25,10 @@ public: explicit Program(Environment& env, const Flow::CFG& cfg); private: - struct Function { - ~Function(); - - std::vector blocks; - }; - boost::pool_allocator block_alloc_pool; - std::vector functions; + boost::container::small_vector functions; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp index a4ea5c5e3..ed5137f20 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -47,12 +47,19 @@ static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { static void EmitBranch(const Flow::Block& flow_block, std::span block_map, IR::IREmitter& ir) { + const auto add_immediate_predecessor = [&](Flow::BlockId label) { + block_map[label]->AddImmediatePredecessor(&ir.block); + }; if (flow_block.cond == true) { + add_immediate_predecessor(flow_block.branch_true); return ir.Branch(block_map[flow_block.branch_true]); } if (flow_block.cond == false) { + add_immediate_predecessor(flow_block.branch_false); return ir.Branch(block_map[flow_block.branch_false]); } + add_immediate_predecessor(flow_block.branch_true); + add_immediate_predecessor(flow_block.branch_false); return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], block_map[flow_block.branch_false]); } diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h index b0d667942..04e044534 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -11,6 +11,7 @@ namespace Shader::Maxwell { +/// Emit termination instructions and collect immediate predecessors void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bc607b002..8be7d6ff1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -208,7 +208,7 @@ public: void P2R_reg(u64 insn); void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); - void PBK(u64 insn); + void PBK(); void PCNT(u64 insn); void PEXIT(u64 insn); void PIXLD(u64 insn); @@ -252,7 +252,7 @@ public: void SHR_reg(u64 insn); void SHR_cbuf(u64 insn); void SHR_imm(u64 insn); - void SSY(u64 insn); + void SSY(); void ST(u64 insn); void STG(u64 insn); void STL(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c907c1ffb..0f52696d1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -762,7 +762,7 @@ void TranslatorVisitor::P2R_imm(u64) { ThrowNotImplemented(Opcode::P2R_imm); } -void TranslatorVisitor::PBK(u64) { +void TranslatorVisitor::PBK() { // PBK is a no-op } @@ -938,8 +938,8 @@ void TranslatorVisitor::SHR_imm(u64) { ThrowNotImplemented(Opcode::SHR_imm); } -void TranslatorVisitor::SSY(u64) { - ThrowNotImplemented(Opcode::SSY); +void TranslatorVisitor::SSY() { + // SSY is a no-op } void TranslatorVisitor::ST(u64) { -- cgit v1.2.3 From d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Feb 2021 16:43:04 -0300 Subject: shader: Initial instruction support --- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/common_encoding.h | 56 ++++++++ .../maxwell/translate/impl/floating_point_add.cpp | 71 ++++++++++ .../impl/floating_point_fused_multiply_add.cpp | 73 ++++++++++ .../translate/impl/floating_point_multiply.cpp | 108 +++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 26 +++- .../frontend/maxwell/translate/impl/impl.h | 9 +- .../maxwell/translate/impl/integer_add.cpp | 106 +++++++++++++++ .../maxwell/translate/impl/integer_scaled_add.cpp | 73 ++++++++++ .../translate/impl/integer_set_predicate.cpp | 99 ++++++++++++++ .../maxwell/translate/impl/integer_shift_left.cpp | 71 ++++++++++ .../translate/impl/integer_short_multiply_add.cpp | 110 +++++++++++++++ .../maxwell/translate/impl/load_store_memory.cpp | 149 +++++++++++++++++---- .../maxwell/translate/impl/move_register.cpp | 45 +++++++ .../translate/impl/move_special_register.cpp | 114 ++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 149 +-------------------- .../maxwell/translate/impl/register_move.cpp | 45 ------- 17 files changed, 1083 insertions(+), 222 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 49d1f4bfb..bd1f96c07 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::Invoke(Optimization::IdentityRemovalPass, function); // Optimization::Invoke(Optimization::VerificationPass, function); } + //*/ } std::string DumpProgram(const Program& program) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..3da37a2bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" + +namespace Shader::Maxwell { + +enum class FpRounding : u64 { + RN, + RM, + RP, + RZ, +}; + +enum class FmzMode : u64 { + None, + FTZ, + FMZ, + INVALIDFMZ3, +}; + +inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { + switch (fp_rounding) { + case FpRounding::RN: + return IR::FpRounding::RN; + case FpRounding::RM: + return IR::FpRounding::RM; + case FpRounding::RP: + return IR::FpRounding::RP; + case FpRounding::RZ: + return IR::FpRounding::RZ; + } + throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); +} + +inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { + switch (fmz_mode) { + case FmzMode::None: + return IR::FmzMode::None; + case FmzMode::FTZ: + return IR::FmzMode::FTZ; + case FmzMode::FMZ: + return IR::FmzMode::FMZ; + case FmzMode::INVALIDFMZ3: + break; + } + throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..d2c44b9cc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, + const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fadd{insn}; + + if (sat) { + throw NotImplementedException("FADD SAT"); + } + if (cc) { + throw NotImplementedException("FADD CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); +} + +void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd{insn}; + + FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, + fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FADD_reg(u64 insn) { + FADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FADD_cbuf(u64) { + throw NotImplementedException("FADD (cbuf)"); +} + +void TranslatorVisitor::FADD_imm(u64) { + throw NotImplementedException("FADD (imm)"); +} + +void TranslatorVisitor::FADD32I(u64) { + throw NotImplementedException("FADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..30ca052ec --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, + bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const ffma{insn}; + + if (sat) { + throw NotImplementedException("FFMA SAT"); + } + if (cc) { + throw NotImplementedException("FFMA CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); +} + +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> sat; + BitField<51, 2, FpRounding> fp_rounding; + BitField<53, 2, FmzMode> fmz_mode; + } const ffma{insn}; + + FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, + ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); +} +} // Anonymous namespace + +void TranslatorVisitor::FFMA_reg(u64 insn) { + FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_rc(u64) { + throw NotImplementedException("FFMA (rc)"); +} + +void TranslatorVisitor::FFMA_cr(u64 insn) { + FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_imm(u64) { + throw NotImplementedException("FFMA (imm)"); +} + +void TranslatorVisitor::FFMA32I(u64) { + throw NotImplementedException("FFMA32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..743a1e2f0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Scale : u64 { + None, + D2, + D4, + D8, + M8, + M4, + M2, + INVALIDSCALE37, +}; + +float ScaleFactor(Scale scale) { + switch (scale) { + case Scale::None: + return 1.0f; + case Scale::D2: + return 1.0f / 2.0f; + case Scale::D4: + return 1.0f / 4.0f; + case Scale::D8: + return 1.0f / 8.0f; + case Scale::M8: + return 8.0f; + case Scale::M4: + return 4.0f; + case Scale::M2: + return 2.0f; + case Scale::INVALIDSCALE37: + break; + } + throw NotImplementedException("Invalid FMUL scale {}", scale); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, + FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fmul{insn}; + + if (cc) { + throw NotImplementedException("FMUL CC"); + } + if (sat) { + throw NotImplementedException("FMUL SAT"); + } + IR::U32 op_a{v.X(fmul.src_a)}; + if (scale != Scale::None) { + if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { + throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); + } + op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); + } + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 3, Scale> scale; + BitField<44, 2, FmzMode> fmz; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<50, 1, u64> sat; + } fmul{insn}; + + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, + fmul.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FMUL_reg(u64 insn) { + return FMUL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FMUL_cbuf(u64) { + throw NotImplementedException("FMUL (cbuf)"); +} + +void TranslatorVisitor::FMUL_imm(u64) { + throw NotImplementedException("FMUL (imm)"); +} + +void TranslatorVisitor::FMUL32I(u64) { + throw NotImplementedException("FMUL32I"); +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7bc7ce9f2..548c7f611 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +IR::U32 TranslatorVisitor::GetReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { union { u64 raw; @@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::U32 TranslatorVisitor::GetImm(u64 insn) { +IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; BitField<20, 19, u64> value; @@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) { return ir.Imm32(value); } +IR::U32 TranslatorVisitor::GetImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(static_cast(imm.value)); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8be7d6ff1..ef6d977fe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -46,7 +46,7 @@ public: void DADD_reg(u64 insn); void DADD_cbuf(u64 insn); void DADD_imm(u64 insn); - void DEPBAR(u64 insn); + void DEPBAR(); void DFMA_reg(u64 insn); void DFMA_rc(u64 insn); void DFMA_cr(u64 insn); @@ -298,9 +298,14 @@ public: [[nodiscard]] IR::U32 X(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + [[nodiscard]] IR::U32 GetReg20(u64 insn); + [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::U32 GetImm(u64 insn); + [[nodiscard]] IR::U32 GetImm20(u64 insn); + + [[nodiscard]] IR::U32 GetImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..60f79b160 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -0,0 +1,106 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, + bool cc) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const iadd{insn}; + + if (sat) { + throw NotImplementedException("IADD SAT"); + } + if (x && po) { + throw NotImplementedException("IADD X+PO"); + } + // Operand A is always read from here, negated if needed + IR::U32 op_a{v.X(iadd.src_a)}; + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + // Add both operands + IR::U32 result{v.ir.IAdd(op_a, op_b)}; + if (x) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + result = v.ir.IAdd(result, carry); + } + if (po) { + // .PO adds one to the result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + if (cc) { + // Store flags + // TODO: Does this grab the result pre-PO or after? + if (po) { + throw NotImplementedException("IADD CC+PO"); + } + // TODO: How does CC behave when X is set? + if (x) { + throw NotImplementedException("IADD X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); + } + // Store result + v.X(iadd.dest_reg, result); +} + +void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<50, 1, u64> sat; + } const iadd{insn}; + + const bool po{iadd.three_for_po == 3}; + const bool neg_a{!po && iadd.neg_a != 0}; + if (!po && iadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD_reg(u64) { + throw NotImplementedException("IADD (reg)"); +} + +void TranslatorVisitor::IADD_cbuf(u64 insn) { + IADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IADD_imm(u64) { + throw NotImplementedException("IADD (imm)"); +} + +void TranslatorVisitor::IADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> x; + BitField<54, 1, u64> sat; + BitField<55, 2, u64> three_for_po; + BitField<56, 1, u64> neg_a; + } const iadd32i{insn}; + + const bool po{iadd32i.three_for_po == 3}; + const bool neg_a{!po && iadd32i.neg_a != 0}; + IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..f92c0bbd6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + const bool po{iscadd.three_for_po == 3}; + IR::U32 op_a{v.X(iscadd.op_a)}; + if (!po) { + // When PO is not present, the bits are interpreted as negation + if (iscadd.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iscadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + } + // With the operands already processed, scale A + const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; + const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; + + IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; + if (po) { + // .PO adds one to the final result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + v.X(iscadd.dest_reg, result); + + if (iscadd.cc != 0) { + throw NotImplementedException("ISCADD CC"); + } +} + +} // Anonymous namespace + +void TranslatorVisitor::ISCADD_reg(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> op_b; + } const iscadd{insn}; + + ISCADD(*this, insn, X(iscadd.op_b)); +} + +void TranslatorVisitor::ISCADD_cbuf(u64) { + throw NotImplementedException("ISCADD (cbuf)"); +} + +void TranslatorVisitor::ISCADD_imm(u64) { + throw NotImplementedException("ISCADD (imm)"); +} + +void TranslatorVisitor::ISCADD32I(u64) { + throw NotImplementedException("ISCADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..76c6b5291 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class CompareOp : u64 { + F, // Always false + LT, // Less than + EQ, // Equal + LE, // Less than or equal + GT, // Greater than + NE, // Not equal + GE, // Greater than or equal + T, // Always true +}; + +enum class Bop : u64 { + AND, + OR, + XOR, +}; + +IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, + bool is_signed) { + switch (op) { + case CompareOp::F: + return ir.Imm1(false); + case CompareOp::LT: + return ir.ILessThan(lhs, rhs, is_signed); + case CompareOp::EQ: + return ir.IEqual(lhs, rhs); + case CompareOp::LE: + return ir.ILessThanEqual(lhs, rhs, is_signed); + case CompareOp::GT: + return ir.IGreaterThan(lhs, rhs, is_signed); + case CompareOp::NE: + return ir.INotEqual(lhs, rhs); + case CompareOp::GE: + return ir.IGreaterThanEqual(lhs, rhs, is_signed); + case CompareOp::T: + return ir.Imm1(true); + } + throw NotImplementedException("Invalid ISETP compare op {}", op); +} + +IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { + switch (bop) { + case Bop::AND: + return ir.LogicalAnd(comparison, bop_pred); + case Bop::OR: + return ir.LogicalOr(comparison, bop_pred); + case Bop::XOR: + return ir.LogicalXor(comparison, bop_pred); + } + throw NotImplementedException("Invalid ISETP bop {}", bop); +} + +void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, Bop> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const isetp{insn}; + + const Bop bop{isetp.bop}; + const IR::U32 op_a{v.X(isetp.src_reg_a)}; + const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; + const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; + const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; + const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; + v.ir.SetPred(isetp.dest_pred_a, result_a); + v.ir.SetPred(isetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::ISETP_reg(u64 insn) { + ISETP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISETP_cbuf(u64 insn) { + ISETP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISETP_imm(u64) { + throw NotImplementedException("ISETP_imm"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..d4b417d14 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> w; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + } const shl{insn}; + + if (shl.x != 0) { + throw NotImplementedException("SHL.X"); + } + if (shl.cc != 0) { + throw NotImplementedException("SHL.CC"); + } + const IR::U32 base{v.X(shl.src_reg_a)}; + IR::U32 result; + if (shl.w != 0) { + // When .W is set, the shift value is wrapped + // To emulate this we just have to clamp it ourselves. + const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; + result = v.ir.ShiftLeftLogical(base, shift); + } else { + // When .W is not set, the shift value is clamped between 0 and 32. + // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. + // We can safely evaluate an out of bounds shift according to the SPIR-V specification: + // + // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical + // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than + // or equal to the bit width of the components of Base." + // + // And on the GLASM specification it is also safe to evaluate out of bounds: + // + // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt + // "The results of a shift operation ("<<") are undefined if the value of the second operand + // is negative, or greater than or equal to the number of bits in the first operand." + // + // Emphasis on undefined results in contrast to undefined behavior. + // + const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; + const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; + result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + } + v.X(shl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHL_reg(u64) { + throw NotImplementedException("SHL_reg"); +} + +void TranslatorVisitor::SHL_cbuf(u64) { + throw NotImplementedException("SHL_cbuf"); +} + +void TranslatorVisitor::SHL_imm(u64 insn) { + SHL(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..70a7c76c5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SelectMode : u64 { + Default, + CLO, + CHI, + CSFU, + CBCC, +}; + +enum class Half : u64 { + H0, // Least-significant bits (15:0) + H1, // Most-significant bits (31:16) +}; + +IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { + const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; + return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); +} + +void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, + SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_a_signed; + BitField<49, 1, u64> is_b_signed; + BitField<53, 1, Half> half_a; + } const xmad{insn}; + + if (x) { + throw NotImplementedException("XMAD X"); + } + const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; + const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; + + IR::U32 product{v.ir.IMul(op_a, op_b)}; + if (psl) { + // .PSL shifts the product 16 bits + product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); + } + const IR::U32 op_c{[&]() -> IR::U32 { + switch (select_mode) { + case SelectMode::Default: + return src_c; + case SelectMode::CLO: + return ExtractHalf(v, src_c, Half::H0, false); + case SelectMode::CHI: + return ExtractHalf(v, src_c, Half::H1, false); + case SelectMode::CBCC: + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + case SelectMode::CSFU: + throw NotImplementedException("XMAD CSFU"); + } + throw NotImplementedException("Invalid XMAD select mode {}", select_mode); + }()}; + IR::U32 result{v.ir.IAdd(product, op_c)}; + if (mrg) { + // .MRG inserts src_b [15:0] into result's [31:16]. + const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; + result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); + } + if (xmad.cc) { + throw NotImplementedException("XMAD CC"); + } + // Store result + v.X(xmad.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::XMAD_reg(u64) { + throw NotImplementedException("XMAD (reg)"); +} + +void TranslatorVisitor::XMAD_rc(u64) { + throw NotImplementedException("XMAD (rc)"); +} + +void TranslatorVisitor::XMAD_cr(u64) { + throw NotImplementedException("XMAD (cr)"); +} + +void TranslatorVisitor::XMAD_imm(u64 insn) { + union { + u64 raw; + BitField<20, 16, u64> src_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<39, 8, IR::Reg> src_c; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + const IR::U32 src_b{ir.Imm32(static_cast(xmad.src_b))}; + const IR::U32 src_c{X(xmad.src_c)}; + XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, + xmad.x != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index d8fd387cf..c9669c617 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -10,16 +10,35 @@ namespace Shader::Maxwell { namespace { +enum class LoadSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, + U128, // ??? +}; + enum class StoreSize : u64 { - U8, - S8, - U16, - S16, + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend B32, B64, B128, }; +// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (cache in L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) +}; + // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html enum class StoreCache : u64 { WB, // Cache write-back all coherent levels @@ -27,61 +46,137 @@ enum class StoreCache : u64 { CS, // Cache streaming, likely to be accessed once WT, // Cache write-through (to system memory) }; -} // Anonymous namespace -void TranslatorVisitor::STG(u64 insn) { - // STG stores registers into global memory. +IR::U64 Address(TranslatorVisitor& v, u64 insn) { union { u64 raw; - BitField<0, 8, IR::Reg> data_reg; BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 24, s64> addr_offset; + BitField<20, 24, u64> rz_addr_offset; BitField<45, 1, u64> e; - BitField<46, 2, StoreCache> cache; - BitField<48, 3, StoreSize> size; - } const stg{insn}; + } const mem{insn}; const IR::U64 address{[&]() -> IR::U64 { - if (stg.e == 0) { - // STG without .E uses a 32-bit pointer, zero-extend it - return ir.ConvertU(64, X(stg.addr_reg)); + if (mem.e == 0) { + // LDG/STG without .E uses a 32-bit pointer, zero-extend it + return v.ir.ConvertU(64, v.X(mem.addr_reg)); } - if (!IR::IsAligned(stg.addr_reg, 2)) { + if (!IR::IsAligned(mem.addr_reg, 2)) { throw NotImplementedException("Unaligned address register"); } - // Pack two registers to build the 32-bit address - return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); + // Pack two registers to build the 64-bit address + return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } }()}; + // Apply the offset + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDG(u64 insn) { + // LDG loads global memory into registers + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<46, 2, LoadCache> cache; + BitField<48, 3, LoadSize> size; + } const ldg{insn}; + + // Pointer to load data from + const IR::U64 address{Address(*this, insn)}; + const IR::Reg dest_reg{ldg.dest_reg}; + switch (ldg.size) { + case LoadSize::U8: + X(dest_reg, ir.LoadGlobalU8(address)); + break; + case LoadSize::S8: + X(dest_reg, ir.LoadGlobalS8(address)); + break; + case LoadSize::U16: + X(dest_reg, ir.LoadGlobalU16(address)); + break; + case LoadSize::S16: + X(dest_reg, ir.LoadGlobalS16(address)); + break; + case LoadSize::B32: + X(dest_reg, ir.LoadGlobal32(address)); + break; + case LoadSize::B64: { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal64(address)}; + for (int i = 0; i < 2; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::B128: { + if (!IR::IsAligned(dest_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal128(address)}; + for (int i = 0; i < 4; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::U128: + throw NotImplementedException("LDG U.128"); + default: + throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); + } +} + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + // Pointer to store data into + const IR::U64 address{Address(*this, insn)}; + const IR::Reg data_reg{stg.data_reg}; switch (stg.size) { case StoreSize::U8: - ir.WriteGlobalU8(address, X(stg.data_reg)); + ir.WriteGlobalU8(address, X(data_reg)); break; case StoreSize::S8: - ir.WriteGlobalS8(address, X(stg.data_reg)); + ir.WriteGlobalS8(address, X(data_reg)); break; case StoreSize::U16: - ir.WriteGlobalU16(address, X(stg.data_reg)); + ir.WriteGlobalU16(address, X(data_reg)); break; case StoreSize::S16: - ir.WriteGlobalS16(address, X(stg.data_reg)); + ir.WriteGlobalS16(address, X(data_reg)); break; case StoreSize::B32: - ir.WriteGlobal32(address, X(stg.data_reg)); + ir.WriteGlobal32(address, X(data_reg)); break; case StoreSize::B64: { - if (!IR::IsAligned(stg.data_reg, 2)) { + if (!IR::IsAligned(data_reg, 2)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; + const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; ir.WriteGlobal64(address, vector); break; } case StoreSize::B128: - if (!IR::IsAligned(stg.data_reg, 4)) { + if (!IR::IsAligned(data_reg, 4)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), - X(stg.data_reg + 2), X(stg.data_reg + 3))}; + const IR::Value vector{ + ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; ir.WriteGlobal128(address, vector); break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..1711d3f48 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +union MOV { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> src_reg; + BitField<39, 4, u64> mask; +}; + +void CheckMask(MOV mov) { + if (mov.mask != 0xf) { + throw NotImplementedException("Non-full move mask"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, X(mov.src_reg)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..93cea302a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -0,0 +1,114 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SpecialRegister : u64 { + SR_LANEID = 0, + SR_VIRTCFG = 2, + SR_VIRTID = 3, + SR_PM0 = 4, + SR_PM1 = 5, + SR_PM2 = 6, + SR_PM3 = 7, + SR_PM4 = 8, + SR_PM5 = 9, + SR_PM6 = 10, + SR_PM7 = 11, + SR_ORDERING_TICKET = 15, + SR_PRIM_TYPE = 16, + SR_INVOCATION_ID = 17, + SR_Y_DIRECTION = 18, + SR_THREAD_KILL = 19, + SM_SHADER_TYPE = 20, + SR_DIRECTCBEWRITEADDRESSLOW = 21, + SR_DIRECTCBEWRITEADDRESSHIGH = 22, + SR_DIRECTCBEWRITEENABLE = 23, + SR_MACHINE_ID_0 = 24, + SR_MACHINE_ID_1 = 25, + SR_MACHINE_ID_2 = 26, + SR_MACHINE_ID_3 = 27, + SR_AFFINITY = 28, + SR_INVOCATION_INFO = 29, + SR_WSCALEFACTOR_XY = 30, + SR_WSCALEFACTOR_Z = 31, + SR_TID = 32, + SR_TID_X = 33, + SR_TID_Y = 34, + SR_TID_Z = 35, + SR_CTAID_X = 37, + SR_CTAID_Y = 38, + SR_CTAID_Z = 39, + SR_NTID = 49, + SR_CirQueueIncrMinusOne = 50, + SR_NLATC = 51, + SR_SWINLO = 57, + SR_SWINSZ = 58, + SR_SMEMSZ = 59, + SR_SMEMBANKS = 60, + SR_LWINLO = 61, + SR_LWINSZ = 62, + SR_LMEMLOSZ = 63, + SR_LMEMHIOFF = 64, + SR_EQMASK = 65, + SR_LTMASK = 66, + SR_LEMASK = 67, + SR_GTMASK = 68, + SR_GEMASK = 69, + SR_REGALLOC = 70, + SR_GLOBALERRORSTATUS = 73, + SR_WARPERRORSTATUS = 75, + SR_PM_HI0 = 81, + SR_PM_HI1 = 82, + SR_PM_HI2 = 83, + SR_PM_HI3 = 84, + SR_PM_HI4 = 85, + SR_PM_HI5 = 86, + SR_PM_HI6 = 87, + SR_PM_HI7 = 88, + SR_CLOCKLO = 89, + SR_CLOCKHI = 90, + SR_GLOBALTIMERLO = 91, + SR_GLOBALTIMERHI = 92, + SR_HWTASKID = 105, + SR_CIRCULARQUEUEENTRYINDEX = 106, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, +}; + +[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { + switch (special_register) { + case SpecialRegister::SR_TID_X: + return ir.LocalInvocationIdX(); + case SpecialRegister::SR_TID_Y: + return ir.LocalInvocationIdY(); + case SpecialRegister::SR_TID_Z: + return ir.LocalInvocationIdZ(); + case SpecialRegister::SR_CTAID_X: + return ir.WorkgroupIdX(); + case SpecialRegister::SR_CTAID_Y: + return ir.WorkgroupIdY(); + case SpecialRegister::SR_CTAID_Z: + return ir.WorkgroupIdZ(); + default: + throw NotImplementedException("S2R special register {}", special_register); + } +} +} // Anonymous namespace + +void TranslatorVisitor::S2R(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, SpecialRegister> src_reg; + } const s2r{insn}; + + X(s2r.dest_reg, Read(ir, s2r.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0f52696d1..d70399f6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -7,21 +7,8 @@ #include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#include "shader_recompiler/ir_opt/passes.h" - namespace Shader::Maxwell { -[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { - auto raw{IR::DumpBlock(block)}; - - Optimization::GetSetElimination(block); - Optimization::DeadCodeEliminationPass(block); - Optimization::IdentityRemovalPass(block); - auto dumped{IR::DumpBlock(block)}; - - fmt::print(stderr, "{}", dumped); -} - [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { throw NotImplementedException("Instruction {} is not implemented", opcode); } @@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) { ThrowNotImplemented(Opcode::DADD_imm); } -void TranslatorVisitor::DEPBAR(u64) { - ThrowNotImplemented(Opcode::DEPBAR); +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op } void TranslatorVisitor::DFMA_reg(u64) { @@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) { ThrowNotImplemented(Opcode::F2F_imm); } -void TranslatorVisitor::FADD_reg(u64) { - ThrowNotImplemented(Opcode::FADD_reg); -} - -void TranslatorVisitor::FADD_cbuf(u64) { - ThrowNotImplemented(Opcode::FADD_cbuf); -} - -void TranslatorVisitor::FADD_imm(u64) { - ThrowNotImplemented(Opcode::FADD_imm); -} - -void TranslatorVisitor::FADD32I(u64) { - ThrowNotImplemented(Opcode::FADD32I); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } @@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) { ThrowNotImplemented(Opcode::FCMP_imm); } -void TranslatorVisitor::FFMA_reg(u64) { - ThrowNotImplemented(Opcode::FFMA_reg); -} - -void TranslatorVisitor::FFMA_rc(u64) { - ThrowNotImplemented(Opcode::FFMA_rc); -} - -void TranslatorVisitor::FFMA_cr(u64) { - ThrowNotImplemented(Opcode::FFMA_cr); -} - -void TranslatorVisitor::FFMA_imm(u64) { - ThrowNotImplemented(Opcode::FFMA_imm); -} - -void TranslatorVisitor::FFMA32I(u64) { - ThrowNotImplemented(Opcode::FFMA32I); -} - void TranslatorVisitor::FLO_reg(u64) { ThrowNotImplemented(Opcode::FLO_reg); } @@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) { ThrowNotImplemented(Opcode::FMNMX_imm); } -void TranslatorVisitor::FMUL_reg(u64) { - ThrowNotImplemented(Opcode::FMUL_reg); -} - -void TranslatorVisitor::FMUL_cbuf(u64) { - ThrowNotImplemented(Opcode::FMUL_cbuf); -} - -void TranslatorVisitor::FMUL_imm(u64) { - ThrowNotImplemented(Opcode::FMUL_imm); -} - -void TranslatorVisitor::FMUL32I(u64) { - ThrowNotImplemented(Opcode::FMUL32I); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } @@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) { ThrowNotImplemented(Opcode::I2I_imm); } -void TranslatorVisitor::IADD_reg(u64) { - ThrowNotImplemented(Opcode::IADD_reg); -} - -void TranslatorVisitor::IADD_cbuf(u64) { - ThrowNotImplemented(Opcode::IADD_cbuf); -} - -void TranslatorVisitor::IADD_imm(u64) { - ThrowNotImplemented(Opcode::IADD_imm); -} - void TranslatorVisitor::IADD3_reg(u64) { ThrowNotImplemented(Opcode::IADD3_reg); } @@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) { ThrowNotImplemented(Opcode::IADD3_imm); } -void TranslatorVisitor::IADD32I(u64) { - ThrowNotImplemented(Opcode::IADD32I); -} - void TranslatorVisitor::ICMP_reg(u64) { ThrowNotImplemented(Opcode::ICMP_reg); } @@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) { ThrowNotImplemented(Opcode::ISBERD); } -void TranslatorVisitor::ISCADD_reg(u64) { - ThrowNotImplemented(Opcode::ISCADD_reg); -} - -void TranslatorVisitor::ISCADD_cbuf(u64) { - ThrowNotImplemented(Opcode::ISCADD_cbuf); -} - -void TranslatorVisitor::ISCADD_imm(u64) { - ThrowNotImplemented(Opcode::ISCADD_imm); -} - -void TranslatorVisitor::ISCADD32I(u64) { - ThrowNotImplemented(Opcode::ISCADD32I); -} - void TranslatorVisitor::ISET_reg(u64) { ThrowNotImplemented(Opcode::ISET_reg); } @@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) { ThrowNotImplemented(Opcode::ISET_imm); } -void TranslatorVisitor::ISETP_reg(u64) { - ThrowNotImplemented(Opcode::ISETP_reg); -} - -void TranslatorVisitor::ISETP_cbuf(u64) { - ThrowNotImplemented(Opcode::ISETP_cbuf); -} - -void TranslatorVisitor::ISETP_imm(u64) { - ThrowNotImplemented(Opcode::ISETP_imm); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } @@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) { ThrowNotImplemented(Opcode::LDC); } -void TranslatorVisitor::LDG(u64) { - ThrowNotImplemented(Opcode::LDG); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } @@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } -void TranslatorVisitor::S2R(u64) { - ThrowNotImplemented(Opcode::S2R); -} - void TranslatorVisitor::SAM(u64) { ThrowNotImplemented(Opcode::SAM); } @@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -void TranslatorVisitor::SHL_reg(u64) { - ThrowNotImplemented(Opcode::SHL_reg); -} - -void TranslatorVisitor::SHL_cbuf(u64) { - ThrowNotImplemented(Opcode::SHL_cbuf); -} - -void TranslatorVisitor::SHL_imm(u64) { - ThrowNotImplemented(Opcode::SHL_imm); -} - void TranslatorVisitor::SHR_reg(u64) { ThrowNotImplemented(Opcode::SHR_reg); } @@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) { ThrowNotImplemented(Opcode::VSHR); } -void TranslatorVisitor::XMAD_reg(u64) { - ThrowNotImplemented(Opcode::XMAD_reg); -} - -void TranslatorVisitor::XMAD_rc(u64) { - ThrowNotImplemented(Opcode::XMAD_rc); -} - -void TranslatorVisitor::XMAD_cr(u64) { - ThrowNotImplemented(Opcode::XMAD_cr); -} - -void TranslatorVisitor::XMAD_imm(u64) { - ThrowNotImplemented(Opcode::XMAD_imm); -} - } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp deleted file mode 100644 index 7fa35ba3a..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { - throw NotImplementedException("Non-full move mask"); - } -} -} // Anonymous namespace - -void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); -} - -void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); -} - -void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm(insn)); -} - -} // namespace Shader::Maxwell -- cgit v1.2.3 From e81739493a0cacc1efe3295f9d287d5d31b1a989 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 05:58:02 -0300 Subject: shader: Constant propagation and global memory to storage buffer --- src/shader_recompiler/frontend/maxwell/program.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index bd1f96c07..b3f2de852 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -52,9 +52,11 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { } std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); + Optimization::Invoke(Optimization::ConstantPropagationPass, function); Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); - Optimization::Invoke(Optimization::IdentityRemovalPass, function); - // Optimization::Invoke(Optimization::VerificationPass, function); + Optimization::IdentityRemovalPass(function); + Optimization::VerificationPass(function); } //*/ } -- cgit v1.2.3 From be94ee88d227d0d3dbeabe9ade98bacd910c7a7e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 19:19:36 -0300 Subject: shader: Make typed IR --- .../maxwell/translate/impl/floating_point_add.cpp | 12 +++++------ .../impl/floating_point_conversion_integer.cpp | 20 +++++++++---------- .../impl/floating_point_fused_multiply_add.cpp | 16 +++++++-------- .../impl/floating_point_multi_function.cpp | 6 +++--- .../translate/impl/floating_point_multiply.cpp | 13 ++++++------ .../frontend/maxwell/translate/impl/impl.cpp | 20 +++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.h | 6 ++++++ .../translate/impl/load_store_attribute.cpp | 23 +++++++++++----------- .../maxwell/translate/impl/load_store_memory.cpp | 4 ++-- 9 files changed, 74 insertions(+), 46 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index d2c44b9cc..cb3a326cf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -11,7 +11,7 @@ namespace Shader::Maxwell { namespace { void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, - const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; @@ -24,17 +24,17 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin if (cc) { throw NotImplementedException("FADD CC"); } - const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); } -void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { +void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { union { u64 raw; BitField<39, 2, FpRounding> fp_rounding; @@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20(insn)); + FADD(*this, insn, GetReg20F(insn)); } void TranslatorVisitor::FADD_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index c4288d9a8..acd8445ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -55,21 +55,21 @@ size_t BitSize(DestFormat dest_format) { } } -void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) { +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; - const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)}; - const IR::U16U32U64 rounded_value{[&] { + const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(float_value); + return v.ir.FPRoundEven(op_a); case Rounding::Floor: - return v.ir.FPFloor(float_value); + return v.ir.FPFloor(op_a); case Rounding::Ceil: - return v.ir.FPCeil(float_value); + return v.ir.FPCeil(op_a); case Rounding::Trunc: - return v.ir.FPTrunc(float_value); + return v.ir.FPTrunc(op_a); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } @@ -105,12 +105,12 @@ void TranslatorVisitor::F2I_reg(u64 insn) { BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; - const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 { + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { switch (f2i.base.src_format) { case SrcFormat::F16: - return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half); + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; case SrcFormat::F32: - return X(f2i.src_reg); + return F(f2i.src_reg); case SrcFormat::F64: return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); default: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 30ca052ec..1464f2807 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -9,7 +9,7 @@ namespace Shader::Maxwell { namespace { -void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { union { u64 raw; @@ -23,18 +23,18 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s if (cc) { throw NotImplementedException("FFMA CC"); } - const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; - const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); + v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); } -void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { union { u64 raw; BitField<47, 1, u64> cc; @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); + FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); + FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index e2ab0dab2..90cddb18b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -35,8 +35,8 @@ void TranslatorVisitor::MUFU(u64 insn) { BitField<50, 1, u64> sat; } const mufu{insn}; - const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; - IR::U32 value{[&]() -> IR::U32 { + const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::F32 value{[&]() -> IR::F32 { switch (mufu.operation) { case Operation::Cos: return ir.FPCosNotReduced(op_a); @@ -65,7 +65,7 @@ void TranslatorVisitor::MUFU(u64 insn) { value = ir.FPSaturate(value); } - X(mufu.dest_reg, value); + F(mufu.dest_reg, value); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 743a1e2f0..1b1d38be7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -4,6 +4,7 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" @@ -43,7 +44,7 @@ float ScaleFactor(Scale scale) { throw NotImplementedException("Invalid FMUL scale {}", scale); } -void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { union { u64 raw; @@ -57,23 +58,23 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode if (sat) { throw NotImplementedException("FMUL SAT"); } - IR::U32 op_a{v.X(fmul.src_a)}; + IR::F32 op_a{v.F(fmul.src_a)}; if (scale != Scale::None) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); } op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); } - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); + v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); } -void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { union { u64 raw; BitField<39, 2, FpRounding> fp_rounding; @@ -90,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20(insn)); + return FMUL(*this, insn, GetReg20F(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 548c7f611..3c9eaddd9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -12,10 +12,18 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); } +IR::F32 TranslatorVisitor::F(IR::Reg reg) { + return ir.BitCast(X(reg)); +} + void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { + X(dest_reg, ir.BitCast(value)); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; @@ -32,6 +40,14 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } +IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { + return ir.BitCast(GetReg20(insn)); +} + +IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { + return ir.BitCast(GetReg39(insn)); +} + IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { union { u64 raw; @@ -49,6 +65,10 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } +IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { + return ir.BitCast(GetCbuf(insn)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ef6d977fe..b701605d7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -296,12 +296,18 @@ public: void XMAD_imm(u64 insn); [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::F32 F(IR::Reg reg); + void X(IR::Reg dest_reg, const IR::U32& value); + void F(IR::Reg dest_reg, const IR::F32& value); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetReg20F(u64 insn); + [[nodiscard]] IR::F32 GetReg39F(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); + [[nodiscard]] IR::F32 GetCbufF(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 23512db1a..de65173e8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -5,22 +5,23 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { enum class InterpolationMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, + Pass, + Multiply, + Constant, + Sc, }; enum class SampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, + Default, + Centroid, + Offset, }; } // Anonymous namespace @@ -54,12 +55,12 @@ void TranslatorVisitor::IPA(u64 insn) { } const IR::Attribute attribute{ipa.attribute}; - IR::U32 value{ir.GetAttribute(attribute)}; + IR::F32 value{ir.GetAttribute(attribute)}; if (IR::IsGeneric(attribute)) { // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; const bool is_perspective{false}; if (is_perspective) { - const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; + const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; value = ir.FPMul(value, rcp_position_w); } } @@ -68,7 +69,7 @@ void TranslatorVisitor::IPA(u64 insn) { case InterpolationMode::Pass: break; case InterpolationMode::Multiply: - value = ir.FPMul(value, ir.GetReg(ipa.multiplier)); + value = ir.FPMul(value, F(ipa.multiplier)); break; case InterpolationMode::Constant: throw NotImplementedException("IPA.CONSTANT"); @@ -86,7 +87,7 @@ void TranslatorVisitor::IPA(u64 insn) { value = ir.FPSaturate(value); } - ir.SetReg(ipa.dest_reg, value); + F(ipa.dest_reg, value); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index c9669c617..9f1570479 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, ir.CompositeExtract(vector, i)); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } break; } @@ -124,7 +124,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, ir.CompositeExtract(vector, i)); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } break; } -- cgit v1.2.3 From 16cb00c521cae6e93ec49d10e15b575b7bc4857e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 23:11:23 -0300 Subject: shader: Add pools and rename files --- .../frontend/maxwell/control_flow.h | 2 +- src/shader_recompiler/frontend/maxwell/decode.cpp | 2 +- src/shader_recompiler/frontend/maxwell/decode.h | 2 +- src/shader_recompiler/frontend/maxwell/opcode.cpp | 26 ------------ src/shader_recompiler/frontend/maxwell/opcode.h | 30 ------------- src/shader_recompiler/frontend/maxwell/opcodes.cpp | 26 ++++++++++++ src/shader_recompiler/frontend/maxwell/opcodes.h | 30 +++++++++++++ src/shader_recompiler/frontend/maxwell/program.cpp | 49 ++++++++-------------- src/shader_recompiler/frontend/maxwell/program.h | 22 +++------- .../impl/floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_multi_function.cpp | 2 +- .../translate/impl/load_store_attribute.cpp | 2 +- .../maxwell/translate/impl/load_store_memory.cpp | 2 +- .../maxwell/translate/impl/move_register.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 5 ++- .../frontend/maxwell/translate/translate.h | 7 +++- 17 files changed, 95 insertions(+), 118 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/opcode.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/opcode.h create mode 100644 src/shader_recompiler/frontend/maxwell/opcodes.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/opcodes.h (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 20ada8afd..49b369282 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -16,7 +16,7 @@ #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell::Flow { diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index ab1cc6c8d..bd85afa1e 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/decode.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { namespace { diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h index 2a3dd28e8..b4f080fd7 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.h +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -5,7 +5,7 @@ #pragma once #include "common/common_types.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/opcode.cpp b/src/shader_recompiler/frontend/maxwell/opcode.cpp deleted file mode 100644 index 8a7bdb611..000000000 --- a/src/shader_recompiler/frontend/maxwell/opcode.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" - -namespace Shader::Maxwell { -namespace { -constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, -#include "maxwell.inc" -#undef INST -}; -} // Anonymous namespace - -const char* NameOf(Opcode opcode) { - if (static_cast(opcode) >= NAME_TABLE.size()) { - throw InvalidArgument("Invalid opcode with raw value {}", static_cast(opcode)); - } - return NAME_TABLE[static_cast(opcode)]; -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcode.h b/src/shader_recompiler/frontend/maxwell/opcode.h deleted file mode 100644 index cd574f29d..000000000 --- a/src/shader_recompiler/frontend/maxwell/opcode.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -namespace Shader::Maxwell { - -enum class Opcode { -#define INST(name, cute, encode) name, -#include "maxwell.inc" -#undef INST -}; - -const char* NameOf(Opcode opcode); - -} // namespace Shader::Maxwell - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - template - auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { - return format_to(ctx.out(), "{}", NameOf(opcode)); - } -}; diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp new file mode 100644 index 000000000..12ddf2ac9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +constexpr std::array NAME_TABLE{ +#define INST(name, cute, encode) #cute, +#include "maxwell.inc" +#undef INST +}; +} // Anonymous namespace + +const char* NameOf(Opcode opcode) { + if (static_cast(opcode) >= NAME_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", static_cast(opcode)); + } + return NAME_TABLE[static_cast(opcode)]; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Shader::Maxwell { + +enum class Opcode { +#define INST(name, cute, encode) name, +#include "maxwell.inc" +#undef INST +}; + +const char* NameOf(Opcode opcode); + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { + return format_to(ctx.out(), "{}", NameOf(opcode)); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b3f2de852..8cdd20804 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -5,6 +5,7 @@ #include #include +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -12,17 +13,18 @@ namespace Shader::Maxwell { namespace { -void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span block_map, IR::Block* block_memory) { +void TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span block_map) { const size_t num_blocks{cfg_function.blocks.size()}; function.blocks.reserve(num_blocks); for (const Flow::BlockId block_id : cfg_function.blocks) { const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); - block_map[flow_block.id] = function.blocks.back().get(); - ++block_memory; + IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; + block_map[flow_block.id] = ir_block; + function.blocks.emplace_back(ir_block); } } @@ -34,21 +36,24 @@ void EmitTerminationInsts(const Flow::Function& cfg_function, } } -void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - IR::Block* block_memory) { +void TranslateFunction(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::Function& cfg_function, + IR::Function& function) { std::vector block_map; block_map.resize(cfg_function.blocks_data.size()); - TranslateCode(env, cfg_function, function, block_map, block_memory); + TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); EmitTerminationInsts(cfg_function, block_map); } } // Anonymous namespace -Program::Program(Environment& env, const Flow::CFG& cfg) { +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::CFG& cfg) { + IR::Program program; + auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(env, cfg_function, functions.emplace_back(), - block_alloc_pool.allocate(cfg_function.blocks.size())); + TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); } std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { @@ -59,27 +64,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::VerificationPass(function); } //*/ -} - -std::string DumpProgram(const Program& program) { - size_t index{0}; - std::map inst_to_index; - std::map block_to_index; - - for (const IR::Function& function : program.functions) { - for (const auto& block : function.blocks) { - block_to_index.emplace(block.get(), index); - ++index; - } - } - std::string ret; - for (const IR::Function& function : program.functions) { - ret += fmt::format("Function\n"); - for (const auto& block : function.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - } - } - return ret; + return program; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 36e678a9e..3355ab129 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -9,28 +9,16 @@ #include #include -#include #include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -class Program { - friend std::string DumpProgram(const Program& program); - -public: - explicit Program(Environment& env, const Flow::CFG& cfg); - -private: - boost::pool_allocator - block_alloc_pool; - boost::container::small_vector functions; -}; - -[[nodiscard]] std::string DumpProgram(const Program& program); +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + const Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index acd8445ad..3d0c48457 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index 90cddb18b..ba005fbf4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index de65173e8..ad97786d4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -6,7 +6,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 9f1570479..727524284 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1711d3f48..1f83d1068 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index d70399f6b..1bb160acb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 66a306745..dcc3f6c0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,8 +23,9 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(Environment& env, const Flow::Block& flow_block) { - IR::Block block{flow_block.begin.Offset(), flow_block.end.Offset()}; +IR::Block Translate(ObjectPool& inst_pool, Environment& env, + const Flow::Block& flow_block) { + IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; TranslatorVisitor visitor{env, block}; const Location pc_end{flow_block.end}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index 788742dea..c1c21b278 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,11 +6,14 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(Environment& env, const Flow::Block& flow_block); +[[nodiscard]] IR::Block Translate(ObjectPool& inst_pool, Environment& env, + const Flow::Block& flow_block); } // namespace Shader::Maxwell -- cgit v1.2.3 From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: spirv: Initial SPIR-V support --- src/shader_recompiler/frontend/maxwell/translate/translate.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index dcc3f6c0e..7e6bb07a2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -11,15 +11,15 @@ namespace Shader::Maxwell { -template +template static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { - using MethodType = decltype(visitor_method); + using MethodType = decltype(method); if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(pc, insn); + (visitor.*method)(pc, insn); } else if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(insn); + (visitor.*method)(insn); } else { - (visitor.*visitor_method)(); + (visitor.*method)(); } } -- cgit v1.2.3 From 9170200a11715d131645d1ffb92e86e6ef0d7e88 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 11 Feb 2021 16:39:06 -0300 Subject: shader: Initial implementation of an AST --- .../frontend/maxwell/control_flow.cpp | 426 ++++++++------------- .../frontend/maxwell/control_flow.h | 77 ++-- src/shader_recompiler/frontend/maxwell/location.h | 12 +- src/shader_recompiler/frontend/maxwell/program.cpp | 69 ++-- src/shader_recompiler/frontend/maxwell/program.h | 2 +- .../frontend/maxwell/termination_code.cpp | 86 ----- .../frontend/maxwell/termination_code.h | 17 - .../maxwell/translate/impl/integer_shift_left.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 17 +- .../frontend/maxwell/translate/translate.h | 7 +- 10 files changed, 238 insertions(+), 477 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 21ee98137..e766b555b 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -17,38 +17,49 @@ #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { +namespace { +struct Compare { + bool operator()(const Block& lhs, Location rhs) const noexcept { + return lhs.begin < rhs; + } + + bool operator()(Location lhs, const Block& rhs) const noexcept { + return lhs < rhs.begin; + } + + bool operator()(const Block& lhs, const Block& rhs) const noexcept { + return lhs.begin < rhs.begin; + } +}; +} // Anonymous namespace static u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static std::array Split(Block&& block, Location pc, BlockId new_id) { - if (pc <= block.begin || pc >= block.end) { +static void Split(Block* old_block, Block* new_block, Location pc) { + if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - return { - Block{ - .begin{block.begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{block.id}, - .stack{block.stack}, - .cond{true}, - .branch_true{new_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }, - Block{ - .begin{pc}, - .end{block.end}, - .end_class{block.end_class}, - .id{new_id}, - .stack{std::move(block.stack)}, - .cond{block.cond}, - .branch_true{block.branch_true}, - .branch_false{block.branch_false}, - .imm_predecessors{}, - }, + *new_block = Block{ + .begin{pc}, + .end{old_block->end}, + .end_class{old_block->end_class}, + .stack{old_block->stack}, + .cond{old_block->cond}, + .branch_true{old_block->branch_true}, + .branch_false{old_block->branch_false}, + .ir{nullptr}, + }; + *old_block = Block{ + .begin{old_block->begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{std::move(old_block->stack)}, + .cond{IR::Condition{true}}, + .branch_true{new_block}, + .branch_false{nullptr}, + .ir{nullptr}, }; } @@ -112,7 +123,7 @@ static bool HasFlowTest(Opcode opcode) { static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { - return fmt::format("\"Virtual {}\"", block.id); + return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } @@ -158,126 +169,23 @@ bool Block::Contains(Location pc) const noexcept { Function::Function(Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block_id{0}, + .block{nullptr}, .stack{}, }} {} -void Function::BuildBlocksMap() { - const size_t num_blocks{NumBlocks()}; - blocks_map.resize(num_blocks); - for (size_t block_index = 0; block_index < num_blocks; ++block_index) { - Block& block{blocks_data[block_index]}; - blocks_map[block.id] = █ - } -} - -void Function::BuildImmediatePredecessors() { - for (const Block& block : blocks_data) { - if (block.branch_true != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); - } - if (block.branch_false != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); - } - } -} - -void Function::BuildPostOrder() { - boost::container::small_vector block_stack; - post_order_map.resize(NumBlocks()); - - Block& first_block{blocks_data[blocks.front()]}; - first_block.post_order_visited = true; - block_stack.push_back(first_block.id); - - const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { - if (branch_id == UNREACHABLE_BLOCK_ID) { - return false; - } - if (blocks_map[branch_id]->post_order_visited) { - return false; - } - blocks_map[branch_id]->post_order_visited = true; - - // Calling push_back twice is faster than insert on msvc - block_stack.push_back(block_id); - block_stack.push_back(branch_id); - return true; - }; - while (!block_stack.empty()) { - const Block* const block{blocks_map[block_stack.back()]}; - block_stack.pop_back(); - - if (!visit_branch(block->id, block->branch_true) && - !visit_branch(block->id, block->branch_false)) { - post_order_map[block->id] = static_cast(post_order_blocks.size()); - post_order_blocks.push_back(block->id); - } - } -} - -void Function::BuildImmediateDominators() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; - auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; - auto intersect{[this](Block* finger1, Block* finger2) { - while (finger1 != finger2) { - while (post_order_map[finger1->id] < post_order_map[finger2->id]) { - finger1 = finger1->imm_dominator; - } - while (post_order_map[finger2->id] < post_order_map[finger1->id]) { - finger2 = finger2->imm_dominator; - } - } - return finger1; - }}; - for (Block& block : blocks_data) { - block.imm_dominator = nullptr; - } - Block* const start_block{&blocks_data[blocks.front()]}; - start_block->imm_dominator = start_block; - - bool changed{true}; - while (changed) { - changed = false; - for (Block* const block : post_order_blocks | reverse_order_but_first) { - Block* new_idom{}; - for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { - new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; - } - changed |= block->imm_dominator != new_idom; - block->imm_dominator = new_idom; - } - } -} - -void Function::BuildDominanceFrontier() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; - for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { - for (Block* current : block.imm_predecessors | transform_block_id) { - while (current != block.imm_dominator) { - current->dominance_frontiers.push_back(current->id); - current = current->imm_dominator; - } - } - } -} - -CFG::CFG(Environment& env_, Location start_address) : env{env_} { - VisitFunctions(start_address); - - for (Function& function : functions) { - function.BuildBlocksMap(); - function.BuildImmediatePredecessors(); - function.BuildPostOrder(); - function.BuildImmediateDominators(); - function.BuildDominanceFrontier(); - } -} - -void CFG::VisitFunctions(Location start_address) { +CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) + : env{env_}, block_pool{block_pool_} { functions.emplace_back(start_address); + functions.back().labels.back().block = block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + }); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -294,35 +202,16 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { return; } // Try to find the next block - Function* function{&functions[function_id]}; + Function* const function{&functions[function_id]}; Location pc{label.address}; - const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, - [function](Location pc, u32 block_index) { - return pc < function->blocks_data[block_index].begin; - })}; - const auto next_index{std::distance(function->blocks.begin(), next)}; - const bool is_last{next == function->blocks.end()}; - Location next_pc; - BlockId next_id{UNREACHABLE_BLOCK_ID}; - if (!is_last) { - next_pc = function->blocks_data[*next].begin; - next_id = function->blocks_data[*next].id; - } + const auto next_it{function->blocks.upper_bound(pc, Compare{})}; + const bool is_last{next_it == function->blocks.end()}; + Block* const next{is_last ? nullptr : &*next_it}; // Insert before the next block - Block block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{label.block_id}, - .stack{std::move(label.stack)}, - .cond{true}, - .branch_true{UNREACHABLE_BLOCK_ID}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }; + Block* const block{label.block}; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; - while (is_last || pc < next_pc) { + while (!next || pc < next->begin) { is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; if (is_branch) { break; @@ -332,43 +221,36 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { if (!is_branch) { // If the block finished without a branch, // it means that the next instruction is already visited, jump to it - block.end = pc; - block.cond = true; - block.branch_true = next_id; - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = next; + block->branch_false = nullptr; } // Function's pointer might be invalid, resolve it again - function = &functions[function_id]; - const u32 new_block_index = static_cast(function->blocks_data.size()); - function->blocks.insert(function->blocks.begin() + next_index, new_block_index); - function->blocks_data.push_back(std::move(block)); + // Insert the new block + functions[function_id].blocks.insert(*block); } bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { const Location pc{label.address}; Function& function{functions[function_id]}; - const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { - return function.blocks_data[block_index].Contains(pc); - })}; + const auto it{ + std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; if (it == function.blocks.end()) { // Address has not been visited return false; } - Block& block{function.blocks_data[*it]}; - if (block.begin == pc) { - throw LogicError("Dangling branch"); - } - const u32 first_index{*it}; - const u32 second_index{static_cast(function.blocks_data.size())}; - const std::array new_indices{first_index, second_index}; - std::array split_blocks{Split(std::move(block), pc, label.block_id)}; - function.blocks_data[*it] = std::move(split_blocks[0]); - function.blocks_data.push_back(std::move(split_blocks[1])); - function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + Block* const visited_block{&*it}; + if (visited_block->begin == pc) { + throw LogicError("Dangling block"); + } + Block* const new_block{label.block}; + Split(visited_block, new_block, pc); + function.blocks.insert(it, *new_block); return true; } -CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { +CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { const Instruction inst{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(inst.raw)}; switch (opcode) { @@ -390,12 +272,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); break; case Opcode::RET: - block.end_class = EndClass::Return; + block->end_class = EndClass::Return; break; default: break; } - block.end = pc; + block->end = pc; return AnalysisState::Branch; case Opcode::BRK: case Opcode::CONT: @@ -404,9 +286,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; } - const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; - block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); - block.end = pc; + const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; + block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block->end = pc; return AnalysisState::Branch; } case Opcode::PBK: @@ -414,7 +296,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati case Opcode::PEXIT: case Opcode::PLONGJMP: case Opcode::SSY: - block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); @@ -444,51 +326,51 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati return AnalysisState::Branch; } -void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, +void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond) { - if (block.begin != pc) { + if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later - block.end = pc; - block.cond = true; - block.branch_true = AddLabel(block, block.stack, pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, block->stack, pc, function_id); + block->branch_false = nullptr; return; } - // Impersonate the visited block with a virtual block - // Jump from this virtual to the real conditional instruction and the next instruction - Function& function{functions[function_id]}; - const BlockId conditional_block_id{++function.current_block_id}; - function.blocks.push_back(static_cast(function.blocks_data.size())); - Block& virtual_block{function.blocks_data.emplace_back(Block{ - .begin{}, // Virtual block - .end{}, + // Create a virtual block and a conditional block + Block* const conditional_block{block_pool.Create()}; + Block virtual_block{ + .begin{block->begin.Virtual()}, + .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .id{block.id}, // Impersonating - .stack{block.stack}, + .stack{block->stack}, .cond{cond}, - .branch_true{conditional_block_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - })}; - // Set the end properties of the conditional instruction and give it a new identity - Block& conditional_block{block}; - conditional_block.end = pc; - conditional_block.end_class = insn_end_class; - conditional_block.id = conditional_block_id; + .branch_true{conditional_block}, + .branch_false{nullptr}, + .ir{nullptr}, + }; + // Save the contents of the visited block in the conditional block + *conditional_block = std::move(*block); + // Impersonate the visited block with a virtual block + *block = std::move(virtual_block); + // Set the end properties of the conditional instruction + conditional_block->end = pc; + conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction - const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block - virtual_block.branch_false = endif_block_id; + block->branch_false = endif_block; // And branch to it from the conditional instruction if it is a branch if (insn_end_class == EndClass::Branch) { - conditional_block.cond = true; - conditional_block.branch_true = endif_block_id; - conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + conditional_block->cond = IR::Condition{true}; + conditional_block->branch_true = endif_block; + conditional_block->branch_false = nullptr; } + // Finally insert the condition block into the list of blocks + functions[function_id].blocks.insert(*conditional_block); } -bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, +bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode) { if (inst.branch.is_cbuf) { throw NotImplementedException("Branch with constant buffer offset"); @@ -500,21 +382,21 @@ bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instr const bool has_flow_test{HasFlowTest(opcode)}; const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - block.cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); - block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + block->cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); } else { - block.cond = true; + block->cond = IR::Condition{true}; } return true; } -void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, +void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute) { const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); + block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { +void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } @@ -528,7 +410,7 @@ void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { } } -CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, +CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; const Predicate pred{inst.Pred()}; @@ -537,41 +419,52 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Locati return AnalysisState::Continue; } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - if (block.stack.Peek(Token::PEXIT).has_value()) { + if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } - if (const std::optional exit_pc{block.stack.Peek(Token::PEXIT)}) { - const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; - block.cond = true; - block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block->branch_false = nullptr; return AnalysisState::Branch; } - block.end = pc; - block.end_class = EndClass::Exit; + block->end = pc; + block->end_class = EndClass::Exit; return AnalysisState::Branch; } -BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { +Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { Function& function{functions[function_id]}; - if (block.begin == pc) { - return block.id; + if (block->begin == pc) { + // Jumps to itself + return block; } - const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; - if (target != function.blocks_data.end()) { - return target->id; + if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { + // Block already exists and it has been visited + return &*it; } - const BlockId block_id{++function.current_block_id}; + // TODO: FIX DANGLING BLOCKS + Block* const new_block{block_pool.Create(Block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{stack}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + })}; function.labels.push_back(Label{ .address{pc}, - .block_id{block_id}, + .block{new_block}, .stack{std::move(stack)}, }); - return block_id; + return new_block; } std::string CFG::Dot() const { @@ -581,18 +474,12 @@ std::string CFG::Dot() const { for (const Function& function : functions) { dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); dot += fmt::format("\t\tnode [style=filled];\n"); - for (const u32 block_index : function.blocks) { - const Block& block{function.blocks_data[block_index]}; + for (const Block& block : function.blocks) { const std::string name{NameOf(block)}; - const auto add_branch = [&](BlockId branch_id, bool add_label) { - const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; - dot += fmt::format("\t\t{}->", name); - if (it == function.blocks_data.end()) { - dot += fmt::format("\"Unknown label {}\"", branch_id); - } else { - dot += NameOf(*it); - }; - if (add_label && block.cond != true && block.cond != false) { + const auto add_branch = [&](Block* branch, bool add_label) { + dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); + if (add_label && block.cond != IR::Condition{true} && + block.cond != IR::Condition{false}) { dot += fmt::format(" [label=\"{}\"]", block.cond); } dot += '\n'; @@ -600,10 +487,10 @@ std::string CFG::Dot() const { dot += fmt::format("\t\t{};\n", name); switch (block.end_class) { case EndClass::Branch: - if (block.cond != false) { + if (block.cond != IR::Condition{false}) { add_branch(block.branch_true, true); } - if (block.cond != true) { + if (block.cond != IR::Condition{true}) { add_branch(block.branch_false, false); } break; @@ -619,12 +506,6 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; - case EndClass::Unreachable: - dot += fmt::format("\t\t{}->N{};\n", name, node_uid); - dot += fmt::format( - "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); - ++node_uid; - break; } } if (function.entrypoint == 8) { @@ -635,10 +516,11 @@ std::string CFG::Dot() const { dot += "\t}\n"; } if (!functions.empty()) { - if (functions.front().blocks.empty()) { + auto& function{functions.front()}; + if (function.blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 49b369282..8179787b8 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -11,25 +11,27 @@ #include #include +#include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +class Block; +} namespace Shader::Maxwell::Flow { -using BlockId = u32; using FunctionId = size_t; -constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast(-1)}; - enum class EndClass { Branch, Exit, Return, - Unreachable, }; enum class Token { @@ -59,58 +61,37 @@ private: boost::container::small_vector entries; }; -struct Block { +struct Block : boost::intrusive::set_base_hook< + // Normal link is ~2.5% faster compared to safe link + boost::intrusive::link_mode> { [[nodiscard]] bool Contains(Location pc) const noexcept; + bool operator<(const Block& rhs) const noexcept { + return begin < rhs.begin; + } + Location begin; Location end; EndClass end_class; - BlockId id; Stack stack; IR::Condition cond; - BlockId branch_true; - BlockId branch_false; - boost::container::small_vector imm_predecessors; - boost::container::small_vector dominance_frontiers; - union { - bool post_order_visited{false}; - Block* imm_dominator; - }; + Block* branch_true; + Block* branch_false; + IR::Block* ir; }; struct Label { Location address; - BlockId block_id; + Block* block; Stack stack; }; struct Function { Function(Location start_address); - void BuildBlocksMap(); - - void BuildImmediatePredecessors(); - - void BuildPostOrder(); - - void BuildImmediateDominators(); - - void BuildDominanceFrontier(); - - [[nodiscard]] size_t NumBlocks() const noexcept { - return static_cast(current_block_id) + 1; - } - Location entrypoint; - BlockId current_block_id{0}; boost::container::small_vector labels; - boost::container::small_vector blocks; - boost::container::small_vector blocks_data; - // Translates from BlockId to block index - boost::container::small_vector blocks_map; - - boost::container::small_vector post_order_blocks; - boost::container::small_vector post_order_map; + boost::intrusive::set blocks; }; class CFG { @@ -120,7 +101,7 @@ class CFG { }; public: - explicit CFG(Environment& env, Location start_address); + explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -133,35 +114,37 @@ public: [[nodiscard]] std::span Functions() const noexcept { return std::span(functions.data(), functions.size()); } + [[nodiscard]] std::span Functions() noexcept { + return std::span(functions.data(), functions.size()); + } private: - void VisitFunctions(Location start_address); - void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); - void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode); - void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); - AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); + Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); Environment& env; + ObjectPool& block_pool; boost::container::small_vector functions; FunctionId current_function_id{0}; }; diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h index 66b51a19e..26d29eae2 100644 --- a/src/shader_recompiler/frontend/maxwell/location.h +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -15,7 +15,7 @@ namespace Shader::Maxwell { class Location { - static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits::max()}; + static constexpr u32 VIRTUAL_BIAS{4}; public: constexpr Location() = default; @@ -27,12 +27,18 @@ public: Align(); } + constexpr Location Virtual() const noexcept { + Location virtual_location; + virtual_location.offset = offset - VIRTUAL_BIAS; + return virtual_location; + } + [[nodiscard]] constexpr u32 Offset() const noexcept { return offset; } [[nodiscard]] constexpr bool IsVirtual() const { - return offset == VIRTUAL_OFFSET; + return offset % 8 == VIRTUAL_BIAS; } constexpr auto operator<=>(const Location&) const noexcept = default; @@ -89,7 +95,7 @@ private: offset -= 8 + (offset % 32 == 8 ? 8 : 0); } - u32 offset{VIRTUAL_OFFSET}; + u32 offset{0xcccccccc}; }; } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8cdd20804..9fa912ed8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,57 +4,58 @@ #include #include +#include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { namespace { -void TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span block_map) { +IR::BlockList TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::Function& cfg_function) { const size_t num_blocks{cfg_function.blocks.size()}; - function.blocks.reserve(num_blocks); - - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - - IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; - block_map[flow_block.id] = ir_block; - function.blocks.emplace_back(ir_block); - } -} - -void EmitTerminationInsts(const Flow::Function& cfg_function, - std::span block_map) { - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } -} - -void TranslateFunction(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::Function& cfg_function, - IR::Function& function) { - std::vector block_map; - block_map.resize(cfg_function.blocks_data.size()); - - TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); - EmitTerminationInsts(cfg_function, block_map); + std::vector blocks(cfg_function.blocks.size()); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + const u32 begin{cfg_block.begin.Offset()}; + const u32 end{cfg_block.end.Offset()}; + blocks[i] = block_pool.Create(inst_pool, begin, end); + cfg_block.ir = blocks[i]; + ++i; + }); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + IR::Block* const block{blocks[i]}; + ++i; + if (cfg_block.end_class != Flow::EndClass::Branch) { + block->SetReturn(); + } else if (cfg_block.cond == IR::Condition{true}) { + block->SetBranch(cfg_block.branch_true->ir); + } else if (cfg_block.cond == IR::Condition{false}) { + block->SetBranch(cfg_block.branch_false->ir); + } else { + block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, + cfg_block.branch_false->ir); + } + }); + return IR::VisitAST(inst_pool, block_pool, blocks, + [&](IR::Block* block) { Translate(env, block); }); } } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg) { IR::Program program; auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); - for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); + for (Flow::Function& cfg_function : cfg.Functions()) { + functions.push_back(IR::Function{ + .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, + }); } + + fmt::print(stdout, "No optimizations: {}", IR::DumpProgram(program)); std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 3355ab129..542621a1d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -19,6 +19,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - const Flow::CFG& cfg); + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp deleted file mode 100644 index ed5137f20..000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" - -namespace Shader::Maxwell { - -static void EmitExit(IR::IREmitter& ir) { - ir.Exit(); -} - -static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { - switch (flow_test) { - case IR::FlowTest::T: - return ir.Imm1(true); - case IR::FlowTest::F: - return ir.Imm1(false); - case IR::FlowTest::NE: - // FIXME: Verify this - return ir.LogicalNot(ir.GetZFlag()); - case IR::FlowTest::NaN: - // FIXME: Verify this - return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); - default: - throw NotImplementedException("Flow test {}", flow_test); - } -} - -static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { - const IR::FlowTest flow_test{cond.FlowTest()}; - const auto [pred, pred_negated]{cond.Pred()}; - if (pred == IR::Pred::PT && !pred_negated) { - return GetFlowTest(flow_test, ir); - } - if (flow_test == IR::FlowTest::T) { - return ir.GetPred(pred, pred_negated); - } - return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); -} - -static void EmitBranch(const Flow::Block& flow_block, std::span block_map, - IR::IREmitter& ir) { - const auto add_immediate_predecessor = [&](Flow::BlockId label) { - block_map[label]->AddImmediatePredecessor(&ir.block); - }; - if (flow_block.cond == true) { - add_immediate_predecessor(flow_block.branch_true); - return ir.Branch(block_map[flow_block.branch_true]); - } - if (flow_block.cond == false) { - add_immediate_predecessor(flow_block.branch_false); - return ir.Branch(block_map[flow_block.branch_false]); - } - add_immediate_predecessor(flow_block.branch_true); - add_immediate_predecessor(flow_block.branch_false); - return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], - block_map[flow_block.branch_false]); -} - -void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map) { - IR::Block* const block{block_map[flow_block.id]}; - IR::IREmitter ir(*block); - switch (flow_block.end_class) { - case Flow::EndClass::Branch: - EmitBranch(flow_block, block_map, ir); - break; - case Flow::EndClass::Exit: - EmitExit(ir); - break; - case Flow::EndClass::Return: - ir.Return(); - break; - case Flow::EndClass::Unreachable: - ir.Unreachable(); - break; - } -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h deleted file mode 100644 index 04e044534..000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" - -namespace Shader::Maxwell { - -/// Emit termination instructions and collect immediate predecessors -void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d4b417d14..b752785d4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -28,7 +28,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { IR::U32 result; if (shl.w != 0) { // When .W is set, the shift value is wrapped - // To emulate this we just have to clamp it ourselves. + // To emulate this we just have to wrap it ourselves. const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; result = v.ir.ShiftLeftLogical(base, shift); } else { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 7e6bb07a2..f1230f58f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,14 +23,13 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(ObjectPool& inst_pool, Environment& env, - const Flow::Block& flow_block) { - IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; - TranslatorVisitor visitor{env, block}; - - const Location pc_end{flow_block.end}; - Location pc{flow_block.begin}; - while (pc != pc_end) { +void Translate(Environment& env, IR::Block* block) { + if (block->IsVirtual()) { + return; + } + TranslatorVisitor visitor{env, *block}; + const Location pc_end{block->LocationEnd()}; + for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { @@ -43,9 +42,7 @@ IR::Block Translate(ObjectPool& inst_pool, Environment& env, default: throw LogicError("Invalid opcode {}", opcode); } - ++pc; } - return block; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index c1c21b278..e1aa2e0f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,14 +6,9 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(ObjectPool& inst_pool, Environment& env, - const Flow::Block& flow_block); +void Translate(Environment& env, IR::Block* block); } // namespace Shader::Maxwell -- cgit v1.2.3 From 8af9297f0972d0aaa8306369c5d04926b886a89e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 01:24:32 -0300 Subject: shader: Misc fixes --- src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp | 4 ++-- .../frontend/maxwell/translate/impl/integer_set_predicate.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 60f79b160..623e78ff8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -76,8 +76,8 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } } // Anonymous namespace -void TranslatorVisitor::IADD_reg(u64) { - throw NotImplementedException("IADD (reg)"); +void TranslatorVisitor::IADD_reg(u64 insn) { + IADD(*this, insn, GetReg20(insn)); } void TranslatorVisitor::IADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 76c6b5291..1bc9ef363 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -92,8 +92,8 @@ void TranslatorVisitor::ISETP_cbuf(u64 insn) { ISETP(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::ISETP_imm(u64) { - throw NotImplementedException("ISETP_imm"); +void TranslatorVisitor::ISETP_imm(u64 insn) { + ISETP(*this, insn, GetImm20(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From cbfb7d182a4e90e4e263696d1fca35e47d3eabb4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 20:15:42 -0300 Subject: shader: Support SSA loops on IR --- src/shader_recompiler/frontend/maxwell/program.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 9fa912ed8..dab6d68c0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -56,11 +57,14 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Tue, 16 Feb 2021 04:10:22 -0300 Subject: spirv: Initial bindings support --- src/shader_recompiler/frontend/maxwell/program.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dab6d68c0..8331d576c 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -53,21 +53,22 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Tue, 16 Feb 2021 19:49:45 -0300 Subject: shader: Simplify ISCADD --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index f92c0bbd6..f06046d4d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -50,12 +50,7 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } // Anonymous namespace void TranslatorVisitor::ISCADD_reg(u64 insn) { - union { - u64 raw; - BitField<20, 8, IR::Reg> op_b; - } const iscadd{insn}; - - ISCADD(*this, insn, X(iscadd.op_b)); + ISCADD(*this, insn, GetReg20(insn)); } void TranslatorVisitor::ISCADD_cbuf(u64) { -- cgit v1.2.3 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +++++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/move_register.cpp | 35 +++++++++++----------- .../maxwell/translate/impl/not_implemented.cpp | 4 --- 5 files changed, 27 insertions(+), 23 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8331d576c..8c44ebb29 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(value)); } +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index b701605d7..8bd468244 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -301,6 +301,7 @@ public: void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetReg20F(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1f83d1068..c3c4b9abd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -10,36 +10,35 @@ namespace Shader::Maxwell { namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { throw NotImplementedException("Non-full move mask"); } + v.X(mov.dest_reg, src); } } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); + MOV(*this, insn, GetReg8(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); + MOV(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm20(insn)); + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 1bb160acb..6b2a1356b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -void TranslatorVisitor::MOV32I(u64) { - ThrowNotImplemented(Opcode::MOV32I); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } -- cgit v1.2.3 From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../impl/floating_point_conversion_integer.cpp | 62 ++++++++++++++++------ .../frontend/maxwell/translate/impl/impl.h | 2 +- 3 files changed, 50 insertions(+), 16 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8c44ebb29..16cdc12e2 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool dest_format; BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; - BitField<39, 1, Rounding> rounding; + BitField<39, 2, Rounding> rounding; BitField<49, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; @@ -55,6 +55,28 @@ size_t BitSize(DestFormat dest_format) { } } +IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); + } + if (cbuf.offset % 2 != 0) { + throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); + } + const IR::U32 binding{v.ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{v.ir.Imm32(static_cast(cbuf.offset) * 4 + 4)}; + const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; + const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; + return v.ir.PackDouble2x32(vector); +} + void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; @@ -82,19 +104,16 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { const size_t bitsize{BitSize(f2i.dest_format)}; const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; - v.X(f2i.dest_reg, result); + if (bitsize == 64) { + const IR::Value vector{v.ir.UnpackUint2x32(result)}; + v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); + v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + } else { + v.X(f2i.dest_reg, result); + } if (f2i.cc != 0) { - v.SetZFlag(v.ir.GetZeroFromOp(result)); - if (is_signed) { - v.SetSFlag(v.ir.GetSignFromOp(result)); - } else { - v.ResetSFlag(); - } - v.ResetCFlag(); - - // TODO: Investigate if out of bound conversions sets the overflow flag - v.ResetOFlag(); + throw NotImplementedException("F2I CC"); } } } // Anonymous namespace @@ -118,12 +137,25 @@ void TranslatorVisitor::F2I_reg(u64 insn) { f2i.base.src_format.Value()); } }()}; - TranslateF2I(*this, insn, op_a); } -void TranslatorVisitor::F2I_cbuf(u64) { - throw NotImplementedException("{}", Opcode::F2I_cbuf); +void TranslatorVisitor::F2I_cbuf(u64 insn) { + const F2I f2i{insn}; + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; + case SrcFormat::F32: + return GetCbufF(insn); + case SrcFormat::F64: { + return UnpackCbuf(*this, insn); + } + default: + throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); } void TranslatorVisitor::F2I_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8bd468244..27aba2cf8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { class TranslatorVisitor { public: - explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} Environment& env; IR::IREmitter ir; -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- .../impl/floating_point_conversion_integer.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmz_mode}, + }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(op_a); + return v.ir.FPRoundEven(op_a, fp_control); case Rounding::Floor: - return v.ir.FPFloor(op_a); + return v.ir.FPFloor(op_a, fp_control); case Rounding::Ceil: - return v.ir.FPCeil(op_a); + return v.ir.FPCeil(op_a, fp_control); case Rounding::Trunc: - return v.ir.FPTrunc(op_a); + return v.ir.FPTrunc(op_a, fp_control); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } -- cgit v1.2.3 From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 17:50:14 -0300 Subject: shader: Rename, implement FADD.SAT and P2R (imm) --- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../maxwell/translate/impl/floating_point_add.cpp | 20 +++---- .../impl/floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_fused_multiply_add.cpp | 4 +- .../translate/impl/floating_point_multiply.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 17 +++++- .../frontend/maxwell/translate/impl/impl.h | 7 ++- .../maxwell/translate/impl/integer_add.cpp | 4 +- .../translate/impl/move_predicate_to_register.cpp | 66 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 10 files changed, 101 insertions(+), 35 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 16cdc12e2..ed5dbf41f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,12 +56,12 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool src_a; } const fadd{insn}; - if (sat) { - throw NotImplementedException("FADD SAT"); - } if (cc) { throw NotImplementedException("FADD CC"); } @@ -31,7 +27,11 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fadd.dest_reg, value); } void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -53,15 +53,15 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20F(insn)); + FADD(*this, insn, GetRegFloat20(insn)); } -void TranslatorVisitor::FADD_cbuf(u64) { - throw NotImplementedException("FADD (cbuf)"); +void TranslatorVisitor::FADD_cbuf(u64 insn) { + FADD(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FADD_imm(u64) { - throw NotImplementedException("FADD (imm)"); +void TranslatorVisitor::FADD_imm(u64 insn) { + FADD(*this, insn, GetFloatImm20(insn)); } void TranslatorVisitor::FADD32I(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 4d82a0009..81175627f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -158,7 +158,7 @@ void TranslatorVisitor::F2I_cbuf(u64 insn) { case SrcFormat::F16: return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; case SrcFormat::F32: - return GetCbufF(insn); + return GetFloatCbuf(insn); case SrcFormat::F64: { return UnpackCbuf(*this, insn); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 1464f2807..758700d3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); + FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 1b1d38be7..5c38d3fc1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -91,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20F(insn)); + return FMUL(*this, insn, GetRegFloat20(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 079e3497f..be17bb0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -73,7 +73,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { return ir.BitCast(GetCbuf(insn)); } @@ -88,6 +88,17 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { return ir.Imm32(value); } +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; + const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 27aba2cf8..4d4cf2ebf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,13 +304,14 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetReg20F(u64 insn); - [[nodiscard]] IR::F32 GetReg39F(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::F32 GetCbufF(u64 insn); + [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 623e78ff8..1493e1815 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -84,8 +84,8 @@ void TranslatorVisitor::IADD_cbuf(u64 insn) { IADD(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::IADD_imm(u64) { - throw NotImplementedException("IADD (imm)"); +void TranslatorVisitor::IADD_imm(u64 insn) { + IADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::IADD32I(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { + throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const p2r{insn}; + + const u32 mask{GetImm20(insn).U32()}; + const bool pr_mode{p2r.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset{static_cast(p2r.byte_selector) * 8}; + IR::U32 insert{ir.Imm32(0)}; + for (u32 index = 0; index < num_items; ++index) { + if (((mask >> index) & 1) == 0) { + continue; + } + const IR::U1 cond{[this, index, pr_mode] { + if (pr_mode) { + return ir.GetPred(IR::Pred{index}); + } + switch (index) { + case 0: + return ir.GetZFlag(); + case 1: + return ir.GetSFlag(); + case 2: + return ir.GetCFlag(); + case 3: + return ir.GetOFlag(); + } + throw LogicError("Unreachable P2R index"); + }()}; + const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; + insert = ir.BitwiseOr(insert, bit); + } + const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; + X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6b2a1356b..628cf1c14 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -633,18 +633,6 @@ void TranslatorVisitor::OUT_imm(u64) { ThrowNotImplemented(Opcode::OUT_imm); } -void TranslatorVisitor::P2R_reg(u64) { - ThrowNotImplemented(Opcode::P2R_reg); -} - -void TranslatorVisitor::P2R_cbuf(u64) { - ThrowNotImplemented(Opcode::P2R_cbuf); -} - -void TranslatorVisitor::P2R_imm(u64) { - ThrowNotImplemented(Opcode::P2R_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } -- cgit v1.2.3 From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 23:42:38 -0300 Subject: spirv: Fixes and Intel specific workarounds --- src/shader_recompiler/frontend/maxwell/program.cpp | 3 --- .../frontend/maxwell/translate/impl/impl.cpp | 15 +++++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ed5dbf41f..dbfc04f75 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,7 +56,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const s32 positive_value{static_cast(imm.value)}; - const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + if (imm.is_negative != 0) { + const s64 raw{static_cast(imm.value)}; + return ir.Imm32(static_cast(-(1LL << 19) + raw)); + } else { + return ir.Imm32(static_cast(imm.value)); + } } IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { @@ -94,9 +97,9 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; - const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 value{static_cast(imm.value) << 12}; + return ir.Imm32(Common::BitCast(value | sign_bit)); } IR::U32 TranslatorVisitor::GetImm32(u64 insn) { -- cgit v1.2.3 From 18a766b3622baa40596490dbd4912f94e9980a76 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 02:45:50 -0300 Subject: shader: Fix MOV(reg), add SHL variants and emit neg and abs instructions --- .../frontend/maxwell/translate/impl/integer_shift_left.cpp | 8 ++++---- .../frontend/maxwell/translate/impl/move_register.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index b752785d4..d8a5158b5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -56,12 +56,12 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { } } // Anonymous namespace -void TranslatorVisitor::SHL_reg(u64) { - throw NotImplementedException("SHL_reg"); +void TranslatorVisitor::SHL_reg(u64 insn) { + SHL(*this, insn, GetReg20(insn)); } -void TranslatorVisitor::SHL_cbuf(u64) { - throw NotImplementedException("SHL_cbuf"); +void TranslatorVisitor::SHL_cbuf(u64 insn) { + SHL(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::SHL_imm(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index c3c4b9abd..6bb08db8a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -26,7 +26,7 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - MOV(*this, insn, GetReg8(insn)); + MOV(*this, insn, GetReg20(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { -- cgit v1.2.3 From e44752ddc8804961eb84f8c225bb36d5b4c77bc1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 22:59:16 -0300 Subject: shader: FMUL, select, RRO, and MUFU fixes --- .../maxwell/translate/impl/common_encoding.h | 3 +- .../maxwell/translate/impl/floating_point_add.cpp | 2 +- .../impl/floating_point_fused_multiply_add.cpp | 4 +-- .../impl/floating_point_multi_function.cpp | 8 ++--- .../translate/impl/floating_point_multiply.cpp | 42 +++++++++++++++------- .../impl/floating_point_range_reduction.cpp | 41 +++++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 12 +++++-- .../frontend/maxwell/translate/impl/impl.h | 5 +-- .../maxwell/translate/impl/integer_shift_left.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 12 ------- 10 files changed, 93 insertions(+), 38 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h index 3da37a2bb..fd73f656c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -46,7 +46,8 @@ inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { case FmzMode::FTZ: return IR::FmzMode::FTZ; case FmzMode::FMZ: - return IR::FmzMode::FMZ; + // FMZ is manually handled in the instruction + return IR::FmzMode::FTZ; case FmzMode::INVALIDFMZ3: break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 219ffcc6a..76a807d4e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetRegFloat20(insn)); + FADD(*this, insn, GetFloatReg20(insn)); } void TranslatorVisitor::FADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 758700d3c..c2ca0873b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index ba005fbf4..2f8605619 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { -enum class Operation { +enum class Operation : u64 { Cos = 0, Sin = 1, Ex2 = 2, // Base 2 exponent @@ -39,11 +39,11 @@ void TranslatorVisitor::MUFU(u64 insn) { IR::F32 value{[&]() -> IR::F32 { switch (mufu.operation) { case Operation::Cos: - return ir.FPCosNotReduced(op_a); + return ir.FPCos(op_a); case Operation::Sin: - return ir.FPSinNotReduced(op_a); + return ir.FPSin(op_a); case Operation::Ex2: - return ir.FPExp2NotReduced(op_a); + return ir.FPExp2(op_a); case Operation::Lg2: return ir.FPLog2(op_a); case Operation::Rcp: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 5c38d3fc1..edf2cadae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -55,9 +55,6 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode if (cc) { throw NotImplementedException("FMUL CC"); } - if (sat) { - throw NotImplementedException("FMUL SAT"); - } IR::F32 op_a{v.F(fmul.src_a)}; if (scale != Scale::None) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { @@ -71,7 +68,20 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); + IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, zero, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fmul.dest_reg, value); } void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -83,27 +93,33 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<47, 1, u64> cc; BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; - } fmul{insn}; - + } const fmul{insn}; FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetRegFloat20(insn)); + return FMUL(*this, insn, GetFloatReg20(insn)); } -void TranslatorVisitor::FMUL_cbuf(u64) { - throw NotImplementedException("FMUL (cbuf)"); +void TranslatorVisitor::FMUL_cbuf(u64 insn) { + return FMUL(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FMUL_imm(u64) { - throw NotImplementedException("FMUL (imm)"); +void TranslatorVisitor::FMUL_imm(u64 insn) { + return FMUL(*this, insn, GetFloatImm20(insn)); } -void TranslatorVisitor::FMUL32I(u64) { - throw NotImplementedException("FMUL32I"); +void TranslatorVisitor::FMUL32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz; + BitField<55, 1, u64> sat; + } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, + fmul32i.sat != 0, fmul32i.cc != 0, false); } } // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + SINCOS, + EX2, +}; + +void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 1, Mode> mode; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + } const rro{insn}; + + v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); +} +} // Anonymous namespace + +void TranslatorVisitor::RRO_reg(u64 insn) { + RRO(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::RRO_cbuf(u64 insn) { + RRO(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::RRO_imm(u64) { + throw NotImplementedException("RRO (imm)"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 165d475b9..a5a0e1a9b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -110,6 +110,14 @@ IR::U32 TranslatorVisitor::GetImm32(u64 insn) { return ir.Imm32(static_cast(imm.value)); } +IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(Common::BitCast(static_cast(imm.value))); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4d4cf2ebf..4e722e205 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,8 +304,8 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); @@ -314,6 +314,7 @@ public: [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d8a5158b5..20af68852 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -50,7 +50,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { // const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; - result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; } v.X(shl.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 628cf1c14..4114e10be 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -721,18 +721,6 @@ void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } -void TranslatorVisitor::RRO_reg(u64) { - ThrowNotImplemented(Opcode::RRO_reg); -} - -void TranslatorVisitor::RRO_cbuf(u64) { - ThrowNotImplemented(Opcode::RRO_cbuf); -} - -void TranslatorVisitor::RRO_imm(u64) { - ThrowNotImplemented(Opcode::RRO_imm); -} - void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } -- cgit v1.2.3 From 9d6a98d950da39dd2a7ca5ad25525de4fb825415 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Feb 2021 04:46:39 -0300 Subject: shader: Implement more of XMAD and FFMA32I and fix XMAD.CBCC --- .../impl/floating_point_fused_multiply_add.cpp | 41 +++++++++++++----- .../translate/impl/floating_point_multiply.cpp | 2 + .../translate/impl/integer_short_multiply_add.cpp | 49 ++++++++++++++++------ 3 files changed, 70 insertions(+), 22 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index c2ca0873b..18561bc9c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -17,9 +17,6 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s BitField<8, 8, IR::Reg> src_a; } const ffma{insn}; - if (sat) { - throw NotImplementedException("FFMA SAT"); - } if (cc) { throw NotImplementedException("FFMA CC"); } @@ -31,7 +28,20 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); + IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, op_c, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(ffma.dest_reg, value); } void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { @@ -54,20 +64,31 @@ void TranslatorVisitor::FFMA_reg(u64 insn) { FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_rc(u64) { - throw NotImplementedException("FFMA (rc)"); +void TranslatorVisitor::FFMA_rc(u64 insn) { + FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); } void TranslatorVisitor::FFMA_cr(u64 insn) { FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_imm(u64) { - throw NotImplementedException("FFMA (imm)"); +void TranslatorVisitor::FFMA_imm(u64 insn) { + FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA32I(u64) { - throw NotImplementedException("FFMA32I"); +void TranslatorVisitor::FFMA32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz_mode; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> neg_c; + } const ffma32i{insn}; + + FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, + ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index edf2cadae..72f0a18ae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -94,6 +94,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; } const fmul{insn}; + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } @@ -118,6 +119,7 @@ void TranslatorVisitor::FMUL32I(u64 insn) { BitField<53, 2, FmzMode> fmz; BitField<55, 1, u64> sat; } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, fmul32i.sat != 0, fmul32i.cc != 0, false); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp index 70a7c76c5..2932cdc42 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -58,7 +58,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s case SelectMode::CHI: return ExtractHalf(v, src_c, Half::H1, false); case SelectMode::CBCC: - return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); case SelectMode::CSFU: throw NotImplementedException("XMAD CSFU"); } @@ -78,16 +78,44 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s } } // Anonymous namespace -void TranslatorVisitor::XMAD_reg(u64) { - throw NotImplementedException("XMAD (reg)"); +void TranslatorVisitor::XMAD_reg(u64 insn) { + union { + u64 raw; + BitField<35, 1, Half> half_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } -void TranslatorVisitor::XMAD_rc(u64) { - throw NotImplementedException("XMAD (rc)"); +void TranslatorVisitor::XMAD_rc(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + } const xmad{insn}; + + XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, + xmad.x != 0); } -void TranslatorVisitor::XMAD_cr(u64) { - throw NotImplementedException("XMAD (cr)"); +void TranslatorVisitor::XMAD_cr(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + BitField<55, 1, u64> psl; + BitField<56, 1, u64> mrg; + } const xmad{insn}; + + XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } void TranslatorVisitor::XMAD_imm(u64 insn) { @@ -97,14 +125,11 @@ void TranslatorVisitor::XMAD_imm(u64 insn) { BitField<36, 1, u64> psl; BitField<37, 1, u64> mrg; BitField<38, 1, u64> x; - BitField<39, 8, IR::Reg> src_c; BitField<50, 3, SelectMode> select_mode; } const xmad{insn}; - const IR::U32 src_b{ir.Imm32(static_cast(xmad.src_b))}; - const IR::U32 src_c{X(xmad.src_c)}; - XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, - xmad.x != 0); + XMAD(*this, insn, ir.Imm32(static_cast(xmad.src_b)), GetReg39(insn), xmad.select_mode, + Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); } } // namespace Shader::Maxwell -- cgit v1.2.3 From e87a502da2d5a8356a639d53c0a16a77890de4c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 05:21:30 -0300 Subject: shader: Fix control flow --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 2 +- src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | 2 +- .../frontend/maxwell/translate/impl/not_implemented.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e766b555b..52be41b84 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -354,7 +354,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4e722e205..672e140b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -209,7 +209,7 @@ public: void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); void PBK(); - void PCNT(u64 insn); + void PCNT(); void PEXIT(u64 insn); void PIXLD(u64 insn); void PLONGJMP(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4114e10be..3f6dedfdd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -637,8 +637,8 @@ void TranslatorVisitor::PBK() { // PBK is a no-op } -void TranslatorVisitor::PCNT(u64) { - ThrowNotImplemented(Opcode::PCNT); +void TranslatorVisitor::PCNT() { + // PCNT is a no-op } void TranslatorVisitor::PEXIT(u64) { -- cgit v1.2.3 From 622d676202bad317a58529efc3c15d08fd04aad1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 18:32:00 -0300 Subject: shader: Fix conditional execution of exit instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 +++++---- src/shader_recompiler/frontend/maxwell/control_flow.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 52be41b84..d0dc66330 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -322,12 +322,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond) { + EndClass insn_end_class, IR::Condition cond, + bool visit_conditional_inst) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -354,7 +355,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc + 1; + conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; @@ -423,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 8179787b8..209c9e551 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -128,7 +128,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond); + IR::Condition cond, bool visit_conditional_inst); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, -- cgit v1.2.3 From 8810c88b7e3de2766bf47e07e941fb2c58c6b4b0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 24 Feb 2021 20:31:15 -0500 Subject: shader: Implement SEL --- .../maxwell/translate/impl/not_implemented.cpp | 12 ------ .../impl/select_source_with_predicate.cpp | 44 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3f6dedfdd..82c73bf8c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -729,18 +729,6 @@ void TranslatorVisitor::SAM(u64) { ThrowNotImplemented(Opcode::SAM); } -void TranslatorVisitor::SEL_reg(u64) { - ThrowNotImplemented(Opcode::SEL_reg); -} - -void TranslatorVisitor::SEL_cbuf(u64) { - ThrowNotImplemented(Opcode::SEL_cbuf); -} - -void TranslatorVisitor::SEL_imm(u64) { - ThrowNotImplemented(Opcode::SEL_imm); -} - void TranslatorVisitor::SETCRSPTR(u64) { ThrowNotImplemented(Opcode::SETCRSPTR); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..25fc6b437 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + } const sel{insn}; + + const IR::U1 pred = v.ir.GetPred(sel.pred); + IR::U32 op_a{v.X(sel.op_a)}; + IR::U32 op_b{src}; + if (sel.neg_pred != 0) { + std::swap(op_a, op_b); + } + const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; + + v.X(sel.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SEL_reg(u64 insn) { + SEL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SEL_cbuf(u64 insn) { + SEL(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SEL_imm(u64 insn) { + SEL(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell -- cgit v1.2.3 From cc55d289494c991e7e0e456e428a110569708c2e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Feb 2021 00:46:40 -0500 Subject: shader: Implement SHR --- .../maxwell/translate/impl/integer_shift_right.cpp | 62 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 2 files changed, 62 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..a34ccb851 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> is_wrapped; + BitField<40, 1, u64> brev; + BitField<43, 1, u64> xmode; + BitField<48, 1, u64> is_arithmetic; + } const shr{insn}; + + if (shr.xmode != 0) { + throw NotImplementedException("SHR.XMODE"); + } + + IR::U32 base{v.X(shr.src_reg_a)}; + if (shr.brev == 1) { + base = v.ir.BitReverse(base); + } + IR::U32 result; + const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); + if (shr.is_arithmetic == 1) { + result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; + } else { + result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; + } + + if (shr.is_wrapped == 0) { + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 safe_bits{v.ir.Imm32(32)}; + + const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; + const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; + const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; + } + v.X(shr.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHR_reg(u64 insn) { + SHR(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SHR_cbuf(u64 insn) { + SHR(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SHR_imm(u64 insn) { + SHR(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 82c73bf8c..45ed04e25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -757,18 +757,6 @@ void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -void TranslatorVisitor::SHR_reg(u64) { - ThrowNotImplemented(Opcode::SHR_reg); -} - -void TranslatorVisitor::SHR_cbuf(u64) { - ThrowNotImplemented(Opcode::SHR_cbuf); -} - -void TranslatorVisitor::SHR_imm(u64) { - ThrowNotImplemented(Opcode::SHR_imm); -} - void TranslatorVisitor::SSY() { // SSY is a no-op } -- cgit v1.2.3 From a8c41c50d3f7a1c2871487862f68925db8b5e27f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 26 Feb 2021 21:41:46 -0500 Subject: shader: Implement POPC --- .../maxwell/translate/impl/integer_popcount.cpp | 36 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -------- 2 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + } const popc{insn}; + + const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); + const IR::U32 result = v.ir.BitCount(operand); + v.X(popc.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::POPC_reg(u64 insn) { + POPC(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::POPC_cbuf(u64 insn) { + POPC(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::POPC_imm(u64 insn) { + POPC(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 45ed04e25..127686b43 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -653,18 +653,6 @@ void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } -void TranslatorVisitor::POPC_reg(u64) { - ThrowNotImplemented(Opcode::POPC_reg); -} - -void TranslatorVisitor::POPC_cbuf(u64) { - ThrowNotImplemented(Opcode::POPC_cbuf); -} - -void TranslatorVisitor::POPC_imm(u64) { - ThrowNotImplemented(Opcode::POPC_imm); -} - void TranslatorVisitor::PRET(u64) { ThrowNotImplemented(Opcode::PRET); } -- cgit v1.2.3 From 34ac9b4d7e71198503d7fca88c0494e1f97701e7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Feb 2021 01:45:13 -0500 Subject: shader: Implement BFE --- .../maxwell/translate/impl/bitfield_extract.cpp | 66 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..4a03e6939 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_reg; + BitField<40, 1, u64> brev; + BitField<48, 1, u64> is_signed; + } const bfe{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + + // Common constants + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 one{v.ir.Imm32(1)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + // Edge case conditions + const IR::U1 zero_count{v.ir.IEqual(count, zero)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; + const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; + + IR::U32 base{v.X(bfe.offset_reg)}; + if (bfe.brev != 0) { + base = v.ir.BitReverse(base); + } + IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; + if (bfe.is_signed != 0) { + const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; + const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; + // Replicate condition + result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; + // Exceeding condition + const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; + result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; + } + // Zero count condition + result = IR::U32{v.ir.Select(zero_count, zero, result)}; + + v.X(bfe.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::BFE_reg(u64 insn) { + BFE(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::BFE_cbuf(u64 insn) { + BFE(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::BFE_imm(u64 insn) { + BFE(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 127686b43..3714f5f4f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -49,18 +49,6 @@ void TranslatorVisitor::BAR(u64) { ThrowNotImplemented(Opcode::BAR); } -void TranslatorVisitor::BFE_reg(u64) { - ThrowNotImplemented(Opcode::BFE_reg); -} - -void TranslatorVisitor::BFE_cbuf(u64) { - ThrowNotImplemented(Opcode::BFE_cbuf); -} - -void TranslatorVisitor::BFE_imm(u64) { - ThrowNotImplemented(Opcode::BFE_imm); -} - void TranslatorVisitor::BFI_reg(u64) { ThrowNotImplemented(Opcode::BFI_reg); } -- cgit v1.2.3 From 08a9e95905fa90d9d2455c9aedf66cebcfc6f6ba Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Feb 2021 16:37:49 -0500 Subject: shader: Implement BFI --- .../maxwell/translate/impl/bitfield_insert.cpp | 56 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 ------- 2 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..ee312c30d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> insert_reg; + } const bfi{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + + // Edge case conditions + const IR::U1 zero_offset{v.ir.IEqual(offset, v.ir.Imm32(0))}; + const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(unsafe_count, max_size, false)}; + + const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; + const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; + + const IR::U32 insert{v.X(bfi.insert_reg)}; + IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; + + result = IR::U32{v.ir.Select(exceed_offset, base, result)}; + result = IR::U32{v.ir.Select(zero_offset, base, result)}; + + v.X(bfi.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::BFI_reg(u64 insn) { + BFI(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_rc(u64 insn) { + BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::BFI_cr(u64 insn) { + BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_imm(u64 insn) { + BFI(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3714f5f4f..ed2cfac60 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -49,22 +49,6 @@ void TranslatorVisitor::BAR(u64) { ThrowNotImplemented(Opcode::BAR); } -void TranslatorVisitor::BFI_reg(u64) { - ThrowNotImplemented(Opcode::BFI_reg); -} - -void TranslatorVisitor::BFI_rc(u64) { - ThrowNotImplemented(Opcode::BFI_rc); -} - -void TranslatorVisitor::BFI_cr(u64) { - ThrowNotImplemented(Opcode::BFI_cr); -} - -void TranslatorVisitor::BFI_imm(u64) { - ThrowNotImplemented(Opcode::BFI_imm); -} - void TranslatorVisitor::BPT(u64) { ThrowNotImplemented(Opcode::BPT); } -- cgit v1.2.3 From 20390c0548d6eef2af67a363ee120a630267b741 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Feb 2021 23:33:53 -0500 Subject: shader: Implement IMNMX --- .../translate/impl/integer_minimum_maximum.cpp | 59 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 2 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..12c6aae3d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 2, u64> mode; + BitField<48, 1, u64> is_signed; + } const imnmx{insn}; + + if (imnmx.mode != 0) { + throw NotImplementedException("IMNMX.MODE"); + } + + IR::U1 pred = v.ir.GetPred(imnmx.pred); + const IR::U32 op_a{v.X(imnmx.src_reg)}; + IR::U32 min; + IR::U32 max; + + if (imnmx.is_signed != 0) { + min = IR::U32{v.ir.SMin(op_a, op_b)}; + max = IR::U32{v.ir.SMax(op_a, op_b)}; + } else { + min = IR::U32{v.ir.UMin(op_a, op_b)}; + max = IR::U32{v.ir.UMax(op_a, op_b)}; + } + if (imnmx.neg_pred != 0) { + std::swap(min, max); + } + + const IR::U32 result{v.ir.Select(pred, min, max)}; + v.X(imnmx.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IMNMX_reg(u64 insn) { + IMNMX(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::IMNMX_cbuf(u64 insn) { + IMNMX(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IMNMX_imm(u64 insn) { + IMNMX(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ed2cfac60..615e3c3b5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -453,18 +453,6 @@ void TranslatorVisitor::IMADSP_imm(u64) { ThrowNotImplemented(Opcode::IMADSP_imm); } -void TranslatorVisitor::IMNMX_reg(u64) { - ThrowNotImplemented(Opcode::IMNMX_reg); -} - -void TranslatorVisitor::IMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::IMNMX_cbuf); -} - -void TranslatorVisitor::IMNMX_imm(u64) { - ThrowNotImplemented(Opcode::IMNMX_imm); -} - void TranslatorVisitor::IMUL_reg(u64) { ThrowNotImplemented(Opcode::IMUL_reg); } -- cgit v1.2.3 From bce0b1dcca4e83ab8bb6692e98d021ded5c0ad5f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 00:18:30 -0500 Subject: shader: Implement ICMP --- .../maxwell/translate/impl/integer_compare.cpp | 83 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 ----- 2 files changed, 83 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..1f604b0ee --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ComparisonOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +[[nodiscard]] IR::U1 CompareToZero(TranslatorVisitor& v, const IR::U32& operand, + ComparisonOp compare_op, bool is_signed) { + const IR::U32 zero{v.ir.Imm32(0)}; + switch (compare_op) { + case ComparisonOp::False: + return v.ir.Imm1(false); + case ComparisonOp::LessThan: + return v.ir.ILessThan(operand, zero, is_signed); + case ComparisonOp::Equal: + return v.ir.IEqual(operand, zero); + case ComparisonOp::LessThanEqual: + return v.ir.ILessThanEqual(operand, zero, is_signed); + case ComparisonOp::GreaterThan: + return v.ir.IGreaterThan(operand, zero, is_signed); + case ComparisonOp::NotEqual: + return v.ir.INotEqual(operand, zero); + case ComparisonOp::GreaterThanEqual: + return v.ir.IGreaterThanEqual(operand, zero, is_signed); + case ComparisonOp::True: + return v.ir.Imm1(true); + default: + throw NotImplementedException("ICMP.CMP"); + } +} + +void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<48, 1, u64> is_signed; + BitField<49, 3, ComparisonOp> compare_op; + } const icmp{insn}; + + const IR::U32 zero{v.ir.Imm32(0)}; + const bool is_signed{icmp.is_signed != 0}; + const IR::U1 cmp_result{CompareToZero(v, operand, icmp.compare_op, is_signed)}; + + const IR::U32 src_reg{v.X(icmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(icmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ICMP_reg(u64 insn) { + ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_rc(u64 insn) { + ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::ICMP_cr(u64 insn) { + ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_imm(u64 insn) { + ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 615e3c3b5..8d4044ee8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -389,22 +389,6 @@ void TranslatorVisitor::IADD3_imm(u64) { ThrowNotImplemented(Opcode::IADD3_imm); } -void TranslatorVisitor::ICMP_reg(u64) { - ThrowNotImplemented(Opcode::ICMP_reg); -} - -void TranslatorVisitor::ICMP_rc(u64) { - ThrowNotImplemented(Opcode::ICMP_rc); -} - -void TranslatorVisitor::ICMP_cr(u64) { - ThrowNotImplemented(Opcode::ICMP_cr); -} - -void TranslatorVisitor::ICMP_imm(u64) { - ThrowNotImplemented(Opcode::ICMP_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From bec7d3111d3de2a7a8384b1e761bc3692afef9c7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 00:25:15 -0500 Subject: shader: Make IMNMX, SHR, SEL stylistically more consistent --- .../frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | 2 +- .../frontend/maxwell/translate/impl/integer_shift_right.cpp | 4 ++-- .../frontend/maxwell/translate/impl/select_source_with_predicate.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 12c6aae3d..5303db612 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -23,7 +23,7 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { throw NotImplementedException("IMNMX.MODE"); } - IR::U1 pred = v.ir.GetPred(imnmx.pred); + IR::U1 pred{v.ir.GetPred(imnmx.pred)}; const IR::U32 op_a{v.X(imnmx.src_reg)}; IR::U32 min; IR::U32 max; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp index a34ccb851..4025b1358 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -16,7 +16,7 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { BitField<39, 1, u64> is_wrapped; BitField<40, 1, u64> brev; BitField<43, 1, u64> xmode; - BitField<48, 1, u64> is_arithmetic; + BitField<48, 1, u64> is_signed; } const shr{insn}; if (shr.xmode != 0) { @@ -29,7 +29,7 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { } IR::U32 result; const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); - if (shr.is_arithmetic == 1) { + if (shr.is_signed == 1) { result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; } else { result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp index 25fc6b437..93baa75a9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -13,13 +13,13 @@ void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> op_a; + BitField<8, 8, IR::Reg> src_reg; BitField<39, 3, IR::Pred> pred; BitField<42, 1, u64> neg_pred; } const sel{insn}; const IR::U1 pred = v.ir.GetPred(sel.pred); - IR::U32 op_a{v.X(sel.op_a)}; + IR::U32 op_a{v.X(sel.src_reg)}; IR::U32 op_b{src}; if (sel.neg_pred != 0) { std::swap(op_a, op_b); -- cgit v1.2.3 From e0389286165805258fa2e54014c2dc506ffb9f35 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 01:30:09 -0500 Subject: shader: Implement ISET, add common_funcs --- .../maxwell/translate/impl/common_funcs.cpp | 46 ++++++++++++++++ .../frontend/maxwell/translate/impl/common_funcs.h | 17 ++++++ .../frontend/maxwell/translate/impl/impl.h | 19 +++++++ .../maxwell/translate/impl/integer_compare.cpp | 39 +------------- .../translate/impl/integer_compare_and_set.cpp | 62 ++++++++++++++++++++++ .../translate/impl/integer_minimum_maximum.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 7 files changed, 147 insertions(+), 50 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..3ec146b1a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, + const IR::U32& operand_2, ComparisonOp compare_op, + bool is_signed) { + switch (compare_op) { + case ComparisonOp::False: + return v.ir.Imm1(false); + case ComparisonOp::LessThan: + return v.ir.ILessThan(operand_1, operand_2, is_signed); + case ComparisonOp::Equal: + return v.ir.IEqual(operand_1, operand_2); + case ComparisonOp::LessThanEqual: + return v.ir.ILessThanEqual(operand_1, operand_2, is_signed); + case ComparisonOp::GreaterThan: + return v.ir.IGreaterThan(operand_1, operand_2, is_signed); + case ComparisonOp::NotEqual: + return v.ir.INotEqual(operand_1, operand_2); + case ComparisonOp::GreaterThanEqual: + return v.ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case ComparisonOp::True: + return v.ir.Imm1(true); + default: + throw NotImplementedException("CMP"); + } +} + +[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop) { + switch (bop) { + case BooleanOp::And: + return v.ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::Or: + return v.ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::Xor: + return v.ir.LogicalXor(predicate_1, predicate_2); + default: + throw NotImplementedException("BOP"); + } +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..293fcce2e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -0,0 +1,17 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, + const IR::U32& operand_2, ComparisonOp compare_op, + bool is_signed); + +[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop); +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 672e140b2..232f8c894 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -9,6 +11,23 @@ namespace Shader::Maxwell { +enum class ComparisonOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +enum class BooleanOp : u64 { + And, + Or, + Xor, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp index 1f604b0ee..d844974d8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -4,46 +4,11 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { -enum class ComparisonOp : u64 { - False, - LessThan, - Equal, - LessThanEqual, - GreaterThan, - NotEqual, - GreaterThanEqual, - True, -}; - -[[nodiscard]] IR::U1 CompareToZero(TranslatorVisitor& v, const IR::U32& operand, - ComparisonOp compare_op, bool is_signed) { - const IR::U32 zero{v.ir.Imm32(0)}; - switch (compare_op) { - case ComparisonOp::False: - return v.ir.Imm1(false); - case ComparisonOp::LessThan: - return v.ir.ILessThan(operand, zero, is_signed); - case ComparisonOp::Equal: - return v.ir.IEqual(operand, zero); - case ComparisonOp::LessThanEqual: - return v.ir.ILessThanEqual(operand, zero, is_signed); - case ComparisonOp::GreaterThan: - return v.ir.IGreaterThan(operand, zero, is_signed); - case ComparisonOp::NotEqual: - return v.ir.INotEqual(operand, zero); - case ComparisonOp::GreaterThanEqual: - return v.ir.IGreaterThanEqual(operand, zero, is_signed); - case ComparisonOp::True: - return v.ir.Imm1(true); - default: - throw NotImplementedException("ICMP.CMP"); - } -} - void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { union { u64 insn; @@ -55,7 +20,7 @@ void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& o const IR::U32 zero{v.ir.Imm32(0)}; const bool is_signed{icmp.is_signed != 0}; - const IR::U1 cmp_result{CompareToZero(v, operand, icmp.compare_op, is_signed)}; + const IR::U1 cmp_result{IntegerCompare(v, operand, zero, icmp.compare_op, is_signed)}; const IR::U32 src_reg{v.X(icmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..b6a7b593d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> x; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, ComparisonOp> compare_op; + } const iset{insn}; + + if (iset.x != 0) { + throw NotImplementedException("ISET.X"); + } + + const IR::U32 src_reg{v.X(iset.src_reg)}; + const bool is_signed{iset.is_signed != 0}; + IR::U1 pred{v.ir.GetPred(iset.pred)}; + if (iset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{IntegerCompare(v, src_reg, src_a, iset.compare_op, is_signed)}; + const IR::U1 bop_result{PredicateCombine(v, cmp_result, pred, iset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; + + const IR::U32 result{v.ir.Select(bop_result, pass_result, fail_result)}; + + v.X(iset.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ISET_reg(u64 insn) { + ISET(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISET_cbuf(u64 insn) { + ISET(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISET_imm(u64 insn) { + ISET(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 5303db612..40f14ab8a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -23,7 +23,7 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { throw NotImplementedException("IMNMX.MODE"); } - IR::U1 pred{v.ir.GetPred(imnmx.pred)}; + const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; const IR::U32 op_a{v.X(imnmx.src_reg)}; IR::U32 min; IR::U32 max; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 8d4044ee8..f327e6fa5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -457,18 +457,6 @@ void TranslatorVisitor::ISBERD(u64) { ThrowNotImplemented(Opcode::ISBERD); } -void TranslatorVisitor::ISET_reg(u64) { - ThrowNotImplemented(Opcode::ISET_reg); -} - -void TranslatorVisitor::ISET_cbuf(u64) { - ThrowNotImplemented(Opcode::ISET_cbuf); -} - -void TranslatorVisitor::ISET_imm(u64) { - ThrowNotImplemented(Opcode::ISET_imm); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } -- cgit v1.2.3 From 103b9da4f7115ff47eee52d0dbd31b5b7a18b257 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 15:58:16 -0500 Subject: shader: Implement FLO --- .../maxwell/translate/impl/find_leading_one.cpp | 46 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ------ 2 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..d5361bec5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FLO(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + BitField<41, 1, u64> shift; + BitField<48, 1, u64> is_signed; + } const flo{insn}; + + const bool invert{flo.tilde != 0}; + const bool is_signed{flo.is_signed != 0}; + const bool shift_op{flo.shift != 0}; + + const IR::U32 operand{invert ? v.ir.BitwiseNot(src) : src}; + const IR::U32 find_result{is_signed ? v.ir.FindSMsb(operand) : v.ir.FindUMsb(operand)}; + const IR::U1 find_fail{v.ir.IEqual(find_result, v.ir.Imm32(-1))}; + const IR::U32 offset{v.ir.Imm32(31)}; + const IR::U32 success_result{shift_op ? IR::U32{v.ir.ISub(offset, find_result)} : find_result}; + + const IR::U32 result{v.ir.Select(find_fail, find_result, success_result)}; + v.X(flo.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FLO_reg(u64 insn) { + FLO(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FLO_cbuf(u64 insn) { + FLO(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::FLO_imm(u64 insn) { + FLO(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index f327e6fa5..2da0b87c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -217,18 +217,6 @@ void TranslatorVisitor::FCMP_imm(u64) { ThrowNotImplemented(Opcode::FCMP_imm); } -void TranslatorVisitor::FLO_reg(u64) { - ThrowNotImplemented(Opcode::FLO_reg); -} - -void TranslatorVisitor::FLO_cbuf(u64) { - ThrowNotImplemented(Opcode::FLO_cbuf); -} - -void TranslatorVisitor::FLO_imm(u64) { - ThrowNotImplemented(Opcode::FLO_imm); -} - void TranslatorVisitor::FMNMX_reg(u64) { ThrowNotImplemented(Opcode::FMNMX_reg); } -- cgit v1.2.3 From ce9b116cfe4fcd96df889ed8997c93c6cd2a502c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 01:05:57 -0500 Subject: Implement PSET, refactor common comparison funcs --- .../maxwell/translate/impl/common_funcs.cpp | 54 +++++++++--------- .../frontend/maxwell/translate/impl/common_funcs.h | 7 +-- .../frontend/maxwell/translate/impl/impl.h | 8 +-- .../maxwell/translate/impl/integer_compare.cpp | 4 +- .../translate/impl/integer_compare_and_set.cpp | 6 +- .../translate/impl/integer_set_predicate.cpp | 64 +++------------------- .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../maxwell/translate/impl/predicate_set.cpp | 41 ++++++++++++++ 8 files changed, 87 insertions(+), 101 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 3ec146b1a..62f825a92 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -5,42 +5,42 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, - const IR::U32& operand_2, ComparisonOp compare_op, +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed) { switch (compare_op) { - case ComparisonOp::False: - return v.ir.Imm1(false); - case ComparisonOp::LessThan: - return v.ir.ILessThan(operand_1, operand_2, is_signed); - case ComparisonOp::Equal: - return v.ir.IEqual(operand_1, operand_2); - case ComparisonOp::LessThanEqual: - return v.ir.ILessThanEqual(operand_1, operand_2, is_signed); - case ComparisonOp::GreaterThan: - return v.ir.IGreaterThan(operand_1, operand_2, is_signed); - case ComparisonOp::NotEqual: - return v.ir.INotEqual(operand_1, operand_2); - case ComparisonOp::GreaterThanEqual: - return v.ir.IGreaterThanEqual(operand_1, operand_2, is_signed); - case ComparisonOp::True: - return v.ir.Imm1(true); + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return ir.ILessThan(operand_1, operand_2, is_signed); + case CompareOp::Equal: + return ir.IEqual(operand_1, operand_2); + case CompareOp::LessThanEqual: + return ir.ILessThanEqual(operand_1, operand_2, is_signed); + case CompareOp::GreaterThan: + return ir.IGreaterThan(operand_1, operand_2, is_signed); + case CompareOp::NotEqual: + return ir.INotEqual(operand_1, operand_2); + case CompareOp::GreaterThanEqual: + return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case CompareOp::True: + return ir.Imm1(true); default: - throw NotImplementedException("CMP"); + throw NotImplementedException("Invalid compare op {}", compare_op); } } -[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop) { switch (bop) { - case BooleanOp::And: - return v.ir.LogicalAnd(predicate_1, predicate_2); - case BooleanOp::Or: - return v.ir.LogicalOr(predicate_1, predicate_2); - case BooleanOp::Xor: - return v.ir.LogicalXor(predicate_1, predicate_2); + case BooleanOp::AND: + return ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::OR: + return ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::XOR: + return ir.LogicalXor(predicate_1, predicate_2); default: - throw NotImplementedException("BOP"); + throw NotImplementedException("Invalid bop {}", bop); } } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index 293fcce2e..61e13fa18 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -8,10 +8,9 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, - const IR::U32& operand_2, ComparisonOp compare_op, - bool is_signed); +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed); -[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 232f8c894..ad09ade7c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { -enum class ComparisonOp : u64 { +enum class CompareOp : u64 { False, LessThan, Equal, @@ -23,9 +23,9 @@ enum class ComparisonOp : u64 { }; enum class BooleanOp : u64 { - And, - Or, - Xor, + AND, + OR, + XOR, }; class TranslatorVisitor { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp index d844974d8..ba6e01926 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -15,12 +15,12 @@ void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& o BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; BitField<48, 1, u64> is_signed; - BitField<49, 3, ComparisonOp> compare_op; + BitField<49, 3, CompareOp> compare_op; } const icmp{insn}; const IR::U32 zero{v.ir.Imm32(0)}; const bool is_signed{icmp.is_signed != 0}; - const IR::U1 cmp_result{IntegerCompare(v, operand, zero, icmp.compare_op, is_signed)}; + const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; const IR::U32 src_reg{v.X(icmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index b6a7b593d..914af010f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -20,7 +20,7 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; - BitField<49, 3, ComparisonOp> compare_op; + BitField<49, 3, CompareOp> compare_op; } const iset{insn}; if (iset.x != 0) { @@ -33,8 +33,8 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (iset.neg_pred != 0) { pred = v.ir.LogicalNot(pred); } - const IR::U1 cmp_result{IntegerCompare(v, src_reg, src_a, iset.compare_op, is_signed)}; - const IR::U1 bop_result{PredicateCombine(v, cmp_result, pred, iset.bop)}; + const IR::U1 cmp_result{IntegerCompare(v.ir, src_reg, src_a, iset.compare_op, is_signed)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 1bc9ef363..7743701d0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -4,62 +4,11 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { -enum class CompareOp : u64 { - F, // Always false - LT, // Less than - EQ, // Equal - LE, // Less than or equal - GT, // Greater than - NE, // Not equal - GE, // Greater than or equal - T, // Always true -}; - -enum class Bop : u64 { - AND, - OR, - XOR, -}; - -IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, - bool is_signed) { - switch (op) { - case CompareOp::F: - return ir.Imm1(false); - case CompareOp::LT: - return ir.ILessThan(lhs, rhs, is_signed); - case CompareOp::EQ: - return ir.IEqual(lhs, rhs); - case CompareOp::LE: - return ir.ILessThanEqual(lhs, rhs, is_signed); - case CompareOp::GT: - return ir.IGreaterThan(lhs, rhs, is_signed); - case CompareOp::NE: - return ir.INotEqual(lhs, rhs); - case CompareOp::GE: - return ir.IGreaterThanEqual(lhs, rhs, is_signed); - case CompareOp::T: - return ir.Imm1(true); - } - throw NotImplementedException("Invalid ISETP compare op {}", op); -} - -IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { - switch (bop) { - case Bop::AND: - return ir.LogicalAnd(comparison, bop_pred); - case Bop::OR: - return ir.LogicalOr(comparison, bop_pred); - case Bop::XOR: - return ir.LogicalXor(comparison, bop_pred); - } - throw NotImplementedException("Invalid ISETP bop {}", bop); -} - void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { union { u64 raw; @@ -68,17 +17,18 @@ void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<8, 8, IR::Reg> src_reg_a; BitField<39, 3, IR::Pred> bop_pred; BitField<42, 1, u64> neg_bop_pred; - BitField<45, 2, Bop> bop; + BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const isetp{insn}; - const Bop bop{isetp.bop}; + const BooleanOp bop{isetp.bop}; + const CompareOp compare_op{isetp.compare_op}; const IR::U32 op_a{v.X(isetp.src_reg_a)}; - const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; + const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)}; const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; - const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; - const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; v.ir.SetPred(isetp.dest_pred_a, result_a); v.ir.SetPred(isetp.dest_pred_b, result_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2da0b87c4..291d7a4bc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -593,10 +593,6 @@ void TranslatorVisitor::PRMT_imm(u64) { ThrowNotImplemented(Opcode::PRMT_imm); } -void TranslatorVisitor::PSET(u64) { - ThrowNotImplemented(Opcode::PSET); -} - void TranslatorVisitor::PSETP(u64) { ThrowNotImplemented(Opcode::PSETP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp new file mode 100644 index 000000000..6c15963fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; + + const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; + const IR::U32 false_result{ir.Imm32(0)}; + + const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + + X(pset.dest_reg, result); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From c2155f04d4220b71432b046694983963036ab6e2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 13:42:09 -0500 Subject: shader: Implement PSETP --- .../maxwell/translate/impl/not_implemented.cpp | 4 --- .../maxwell/translate/impl/predicate_set.cpp | 41 ---------------------- .../translate/impl/predicate_set_predicate.cpp | 38 ++++++++++++++++++++ .../translate/impl/predicate_set_register.cpp | 41 ++++++++++++++++++++++ 4 files changed, 79 insertions(+), 45 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 291d7a4bc..91a9858c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -593,10 +593,6 @@ void TranslatorVisitor::PRMT_imm(u64) { ThrowNotImplemented(Opcode::PRMT_imm); } -void TranslatorVisitor::PSETP(u64) { - ThrowNotImplemented(Opcode::PSETP); -} - void TranslatorVisitor::R2B(u64) { ThrowNotImplemented(Opcode::R2B); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp deleted file mode 100644 index 6c15963fa..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -void TranslatorVisitor::PSET(u64 insn) { - union { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<12, 3, IR::Pred> pred_a; - BitField<15, 1, u64> neg_pred_a; - BitField<24, 2, BooleanOp> bop_1; - BitField<29, 3, IR::Pred> pred_b; - BitField<32, 1, u64> neg_pred_b; - BitField<39, 3, IR::Pred> pred_c; - BitField<42, 1, u64> neg_pred_c; - BitField<44, 1, u64> bf; - BitField<45, 2, BooleanOp> bop_2; - } const pset{insn}; - - const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; - const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; - const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; - - const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; - const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; - - const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; - const IR::U32 false_result{ir.Imm32(0)}; - - const IR::U32 result{ir.Select(res_2, true_result, false_result)}; - - X(pset.dest_reg, result); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; + const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; + const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; + + ir.SetPred(pset.dest_pred_a, result_a); + ir.SetPred(pset.dest_pred_b, result_b); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..6c15963fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; + + const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; + const IR::U32 false_result{ir.Imm32(0)}; + + const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + + X(pset.dest_reg, result); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 382cba94ed52f4fae7db437a3056563ba2110e8b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 14:59:28 -0500 Subject: shader: Implement IADD3 --- .../translate/impl/integer_add_three_input.cpp | 103 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 2 files changed, 103 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..c2dbd7998 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -0,0 +1,103 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Shift : u64 { + None, + Right, + Left, +}; +enum class Half : u64 { + All, + Lower, + Upper, +}; + +[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { + constexpr bool is_signed{false}; + switch (half) { + case Half::Lower: + return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); + case Half::Upper: + return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); + default: + return value; + } +} + +[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { + switch (shift) { + case Shift::Right: + return ir.ShiftRightLogical(value, ir.Imm32(16)); + case Shift::Left: + return ir.ShiftLeftLogical(value, ir.Imm32(16)); + default: + return value; + } +} + +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + BitField<31, 2, Half> half_c; + BitField<33, 2, Half> half_b; + BitField<35, 2, Half> half_a; + BitField<37, 2, Shift> shift; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> x; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> neg_b; + BitField<51, 1, u64> neg_a; + } iadd3{insn}; + + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X"); + } + if (iadd3.cc != 0) { + throw NotImplementedException("IADD3 CC"); + } + + IR::U32 op_a{v.X(iadd3.src_a)}; + op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); + op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); + op_c = IntegerHalf(v.ir, op_c, iadd3.half_c); + + if (iadd3.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iadd3.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + if (iadd3.neg_c != 0) { + op_c = v.ir.INeg(op_c); + } + + IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; + lhs = IntegerShift(v.ir, lhs, iadd3.shift); + const IR::U32 result{v.ir.IAdd(lhs, op_c)}; + + v.X(iadd3.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD3_reg(u64 insn) { + IADD3(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_cbuf(u64 insn) { + IADD3(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_imm(u64 insn) { + IADD3(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 91a9858c6..c93304a67 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -365,18 +365,6 @@ void TranslatorVisitor::I2I_imm(u64) { ThrowNotImplemented(Opcode::I2I_imm); } -void TranslatorVisitor::IADD3_reg(u64) { - ThrowNotImplemented(Opcode::IADD3_reg); -} - -void TranslatorVisitor::IADD3_cbuf(u64) { - ThrowNotImplemented(Opcode::IADD3_cbuf); -} - -void TranslatorVisitor::IADD3_imm(u64) { - ThrowNotImplemented(Opcode::IADD3_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From 980cafdc27444484a2a2794be5de92ea18de6e27 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 3 Mar 2021 00:41:05 -0500 Subject: shader: Implement LOP and LOP3 --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/common_funcs.cpp | 25 ++++- .../frontend/maxwell/translate/impl/common_funcs.h | 2 + .../frontend/maxwell/translate/impl/impl.h | 7 ++ .../maxwell/translate/impl/logic_operation.cpp | 77 ++++++++++++++ .../translate/impl/logic_operation_three_input.cpp | 117 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 24 ----- 7 files changed, 225 insertions(+), 31 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1515285bf..5d0b91598 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -178,8 +178,8 @@ INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") -INST(LOP3_cbuf, "LOP3 (cbuf)", "0011 11-- ---- ----") -INST(LOP3_imm, "LOP3 (imm)", "0000 001- ---- ----") +INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----") +INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----") INST(LOP32I, "LOP32I", "0000 01-- ---- ----") INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 62f825a92..9d4ac2e36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -5,9 +5,8 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, - const IR::U32& operand_2, CompareOp compare_op, - bool is_signed) { +IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { switch (compare_op) { case CompareOp::False: return ir.Imm1(false); @@ -30,8 +29,8 @@ namespace Shader::Maxwell { } } -[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, - const IR::U1& predicate_2, BooleanOp bop) { +IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, + BooleanOp bop) { switch (bop) { case BooleanOp::AND: return ir.LogicalAnd(predicate_1, predicate_2); @@ -43,4 +42,20 @@ namespace Shader::Maxwell { throw NotImplementedException("Invalid bop {}", bop); } } + +IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { + switch (op) { + case PredicateOp::False: + return ir.Imm1(false); + case PredicateOp::True: + return ir.Imm1(true); + case PredicateOp::Zero: + return ir.IEqual(result, ir.Imm32(0)); + case PredicateOp::NonZero: + return ir.INotEqual(result, ir.Imm32(0)); + default: + throw NotImplementedException("Invalid Predicate operation {}", op); + } +} + } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index 61e13fa18..c9ae5c500 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -13,4 +13,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); + +[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ad09ade7c..c6253c40c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -28,6 +28,13 @@ enum class BooleanOp : u64 { XOR, }; +enum class PredicateOp : u64 { + False, + True, + Zero, + NonZero, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..e786a388e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -0,0 +1,77 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class LogicalOp : u64 { + AND, + OR, + XOR, + PASS_B, +}; + +[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, LogicalOp op) { + switch (op) { + case LogicalOp::AND: + return ir.BitwiseAnd(operand_1, operand_2); + case LogicalOp::OR: + return ir.BitwiseOr(operand_1, operand_2); + case LogicalOp::XOR: + return ir.BitwiseXor(operand_1, operand_2); + case LogicalOp::PASS_B: + return operand_2; + default: + throw NotImplementedException("Invalid Logical operation {}", op); + } +} + +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 1, u64> neg_a; + BitField<40, 1, u64> neg_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop{insn}; + + if (lop.x != 0) { + throw NotImplementedException("LOP X"); + } + IR::U32 op_a{v.X(lop.src_reg)}; + if (lop.neg_a != 0) { + op_a = v.ir.BitwiseNot(op_a); + } + if (lop.neg_b != 0) { + op_b = v.ir.BitwiseNot(op_b); + } + + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)}; + const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)}; + v.X(lop.dest_reg, result); + v.ir.SetPred(lop.pred, pred_result); +} +} // Anonymous namespace + +void TranslatorVisitor::LOP_reg(u64 insn) { + LOP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LOP_cbuf(u64 insn) { + LOP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LOP_imm(u64 insn) { + LOP(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..256c47504 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -0,0 +1,117 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 +// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) +IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, + u64 ttbl) { + IR::U32 r{ir.Imm32(0)}; + const IR::U32 not_a{ir.BitwiseNot(a)}; + const IR::U32 not_b{ir.BitwiseNot(b)}; + const IR::U32 not_c{ir.BitwiseNot(c)}; + if (ttbl & 0x01) { + // r |= ~a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x02) { + // r |= ~a & ~b & c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x04) { + // r |= ~a & b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x08) { + // r |= ~a & b & c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x10) { + // r |= a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x20) { + // r |= a & ~b & c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x40) { + // r |= a & b & ~c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x80) { + // r |= a & b & c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + return r; +} + +IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + } const lop3{insn}; + + const IR::U32 op_a{v.X(lop3.src_reg)}; + const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; + v.X(lop3.dest_reg, result); + return result; +} + +u64 GetLut48(u64 insn) { + union { + u64 raw; + BitField<48, 8, u64> lut; + } const lut{insn}; + return lut.lut; +} +} // Anonymous namespace + +void TranslatorVisitor::LOP3_reg(u64 insn) { + union { + u64 insn; + BitField<28, 8, u64> lut; + BitField<38, 1, u64> x; + BitField<36, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop3{insn}; + + if (lop3.x != 0) { + throw NotImplementedException("LOP3 X"); + } + const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; + const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; + ir.SetPred(lop3.pred, pred_result); +} + +void TranslatorVisitor::LOP3_cbuf(u64 insn) { + LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); +} + +void TranslatorVisitor::LOP3_imm(u64 insn) { + LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c93304a67..a0535f1c2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -493,30 +493,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::LOP_reg(u64) { - ThrowNotImplemented(Opcode::LOP_reg); -} - -void TranslatorVisitor::LOP_cbuf(u64) { - ThrowNotImplemented(Opcode::LOP_cbuf); -} - -void TranslatorVisitor::LOP_imm(u64) { - ThrowNotImplemented(Opcode::LOP_imm); -} - -void TranslatorVisitor::LOP3_reg(u64) { - ThrowNotImplemented(Opcode::LOP3_reg); -} - -void TranslatorVisitor::LOP3_cbuf(u64) { - ThrowNotImplemented(Opcode::LOP3_cbuf); -} - -void TranslatorVisitor::LOP3_imm(u64) { - ThrowNotImplemented(Opcode::LOP3_imm); -} - void TranslatorVisitor::LOP32I(u64) { ThrowNotImplemented(Opcode::LOP32I); } -- cgit v1.2.3 From 4006929c986a2e0e52429fe21201a7ad5ca3fea9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Mar 2021 03:07:19 -0300 Subject: shader: Implement HADD2 --- .../translate/impl/half_floating_point_add.cpp | 184 +++++++++++++++++++++ .../maxwell/translate/impl/load_store_memory.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 16 -- 3 files changed, 185 insertions(+), 17 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..6965adfb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -0,0 +1,184 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool h0{merge == Merge::MRG_H0}; + const IR::F16& insert{h0 ? lhs : rhs}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hadd2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<32, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, Swizzle> swizzle_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + GetReg20(insn)); +} + +void TranslatorVisitor::HADD2_cbuf(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> abs_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + GetCbuf(insn)); +} + +void TranslatorVisitor::HADD2_imm(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HADD2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.imm32)}; + HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, + hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 727524284..748b856c9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { const IR::U64 address{[&]() -> IR::U64 { if (mem.e == 0) { // LDG/STG without .E uses a 32-bit pointer, zero-extend it - return v.ir.ConvertU(64, v.X(mem.addr_reg)); + return v.ir.UConvert(64, v.X(mem.addr_reg)); } if (!IR::IsAligned(mem.addr_reg, 2)) { throw NotImplementedException("Unaligned address register"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0535f1c2..c24f29ff7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HADD2_reg(u64) { - ThrowNotImplemented(Opcode::HADD2_reg); -} - -void TranslatorVisitor::HADD2_cbuf(u64) { - ThrowNotImplemented(Opcode::HADD2_cbuf); -} - -void TranslatorVisitor::HADD2_imm(u64) { - ThrowNotImplemented(Opcode::HADD2_imm); -} - -void TranslatorVisitor::HADD2_32I(u64) { - ThrowNotImplemented(Opcode::HADD2_32I); -} - void TranslatorVisitor::HFMA2_reg(u64) { ThrowNotImplemented(Opcode::HFMA2_reg); } -- cgit v1.2.3 From 81f72471e831a0bc4205df6df61e5b510a5c25ac Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 4 Mar 2021 01:02:44 -0500 Subject: shader: Implement I2I --- .../impl/integer_to_integer_conversion.cpp | 99 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 2 files changed, 99 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..ca28c6dd9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class IntegerWidth : u64 { + Byte, + Short, + Word, +}; + +[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { + switch (width) { + case IntegerWidth::Byte: + return ir.Imm32(8); + case IntegerWidth::Short: + return ir.Imm32(16); + case IntegerWidth::Word: + return ir.Imm32(32); + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, + IntegerWidth dst_width) { + const IR::U32 zero{ir.Imm32(0)}; + switch (dst_width) { + case IntegerWidth::Byte: + return ir.BitFieldExtract(src, zero, ir.Imm32(8), false); + case IntegerWidth::Short: + return ir.BitFieldExtract(src, zero, ir.Imm32(16), false); + case IntegerWidth::Word: + return ir.BitFieldExtract(src, zero, ir.Imm32(32), false); + default: + throw NotImplementedException("Invalid width {}", dst_width); + } +} + +void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, IntegerWidth> dst_fmt; + BitField<12, 1, u64> dst_fmt_sign; + BitField<10, 2, IntegerWidth> src_fmt; + BitField<13, 1, u64> src_fmt_sign; + BitField<41, 3, u64> selector; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + BitField<50, 1, u64> sat; + } const i2i{insn}; + + if (i2i.sat != 0) { + throw NotImplementedException("I2I SAT"); + } + if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { + throw NotImplementedException("16-bit source format incompatible with selector {}", + i2i.selector); + } + if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { + throw NotImplementedException("32-bit source format incompatible with selector {}", + i2i.selector); + } + + const s32 selector{static_cast(i2i.selector)}; + const IR::U32 offset{v.ir.Imm32(selector * 8)}; + const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, i2i.src_fmt_sign != 0)}; + if (i2i.abs) { + src_values = v.ir.IAbs(src_values); + } + if (i2i.neg) { + src_values = v.ir.INeg(src_values); + } + + const IR::U32 result{ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; + v.X(i2i.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::I2I_reg(u64 insn) { + I2I(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::I2I_cbuf(u64 insn) { + I2I(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::I2I_imm(u64 insn) { + I2I(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c24f29ff7..bd7a7a8b7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -337,18 +337,6 @@ void TranslatorVisitor::I2F_imm(u64) { ThrowNotImplemented(Opcode::I2F_imm); } -void TranslatorVisitor::I2I_reg(u64) { - ThrowNotImplemented(Opcode::I2I_reg); -} - -void TranslatorVisitor::I2I_cbuf(u64) { - ThrowNotImplemented(Opcode::I2I_cbuf); -} - -void TranslatorVisitor::I2I_imm(u64) { - ThrowNotImplemented(Opcode::I2I_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From d1edc16ba87f3247ad220042050bfea2999067ff Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 4 Mar 2021 20:12:44 -0300 Subject: shader: Deduplicate HADD2 code --- .../translate/impl/half_floating_point_add.cpp | 35 ++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 6965adfb3..c292d5e87 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -107,54 +107,52 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool } v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); } -} // Anonymous namespace -void TranslatorVisitor::HADD2_reg(u64 insn) { +void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, + const IR::U32& src_b) { union { u64 raw; BitField<49, 2, Merge> merge; BitField<39, 1, u64> ftz; - BitField<32, 1, u64> sat; BitField<43, 1, u64> neg_a; BitField<44, 1, u64> abs_a; BitField<47, 2, Swizzle> swizzle_a; + } const hadd2{insn}; + + HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, + hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; BitField<31, 1, u64> neg_b; BitField<30, 1, u64> abs_b; BitField<28, 2, Swizzle> swizzle_b; } const hadd2{insn}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, GetReg20(insn)); } void TranslatorVisitor::HADD2_cbuf(u64 insn) { union { u64 raw; - BitField<49, 2, Merge> merge; - BitField<39, 1, u64> ftz; BitField<52, 1, u64> sat; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, Swizzle> swizzle_a; BitField<56, 1, u64> neg_b; BitField<54, 1, u64> abs_b; } const hadd2{insn}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, GetCbuf(insn)); } void TranslatorVisitor::HADD2_imm(u64 insn) { union { u64 raw; - BitField<49, 2, Merge> merge; - BitField<39, 1, u64> ftz; BitField<52, 1, u64> sat; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, Swizzle> swizzle_a; BitField<56, 1, u64> neg_high; BitField<30, 9, u64> high; BitField<29, 1, u64> neg_low; @@ -163,8 +161,7 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); + HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } void TranslatorVisitor::HADD2_32I(u64 insn) { -- cgit v1.2.3 From 5465cb156107a27df525dfedbfd4e920b7f71253 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 5 Mar 2021 01:15:16 -0500 Subject: shader: Implement LEA --- .../translate/impl/load_effective_address.cpp | 100 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 20 ----- 2 files changed, 100 insertions(+), 20 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..784588e83 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, + bool neg, bool x) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + + if (x) { + throw NotImplementedException("LEA.HI X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; + const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; + + const s32 hi_scale{32 - static_cast(scale)}; + const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; + const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; + v.X(lea.dest_reg, result); +} + +void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<39, 5, u64> scale; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> x; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + if (lea.x != 0) { + throw NotImplementedException("LEA.LO X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const s32 scale{static_cast(lea.scale)}; + const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; + const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset)}; + v.X(lea.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::LEA_hi_reg(u64 insn) { + union { + u64 insn; + BitField<28, 5, u64> scale; + BitField<37, 1, u64> neg; + BitField<38, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { + union { + u64 insn; + BitField<51, 5, u64> scale; + BitField<56, 1, u64> neg; + BitField<57, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_lo_reg(u64 insn) { + LEA_lo(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { + LEA_lo(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LEA_lo_imm(u64 insn) { + LEA_lo(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index bd7a7a8b7..62863aff6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -437,26 +437,6 @@ void TranslatorVisitor::LDS(u64) { ThrowNotImplemented(Opcode::LDS); } -void TranslatorVisitor::LEA_hi_reg(u64) { - ThrowNotImplemented(Opcode::LEA_hi_reg); -} - -void TranslatorVisitor::LEA_hi_cbuf(u64) { - ThrowNotImplemented(Opcode::LEA_hi_cbuf); -} - -void TranslatorVisitor::LEA_lo_reg(u64) { - ThrowNotImplemented(Opcode::LEA_lo_reg); -} - -void TranslatorVisitor::LEA_lo_cbuf(u64) { - ThrowNotImplemented(Opcode::LEA_lo_cbuf); -} - -void TranslatorVisitor::LEA_lo_imm(u64) { - ThrowNotImplemented(Opcode::LEA_lo_imm); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } -- cgit v1.2.3 From 924f0a9149b6777782347be3d2c833a5f8e90058 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 7 Mar 2021 14:48:03 -0500 Subject: shader: Implement SHF --- .../translate/impl/integer_funnel_shift.cpp | 77 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 ----- 2 files changed, 77 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..d8d6c939e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -0,0 +1,77 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class MaxShift : u64 { + U32, + Undefined, + U64, + S64, +}; + +IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, + bool right_shift, bool is_signed) { + if (!right_shift) { + return ir.ShiftLeftLogical(packed_int, safe_shift); + } + if (is_signed) { + return ir.ShiftRightArithmetic(packed_int, safe_shift); + } + return ir.ShiftRightLogical(packed_int, safe_shift); +} + +void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, + bool right_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<0, 8, IR::Reg> lo_bits_reg; + BitField<37, 2, MaxShift> max_shift; + BitField<48, 2, u64> x_mode; + BitField<50, 1, u64> wrap; + } const shf{insn}; + if (shf.x_mode != 0) { + throw NotImplementedException("SHF X Mode"); + } + if (shf.max_shift == MaxShift::Undefined) { + throw NotImplementedException("SHF Use of undefined MaxShift value"); + } + const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; + const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; + const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; + const IR::U32 safe_shift{shf.wrap != 0 + ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) + : v.ir.UMin(shift, max_shift)}; + + const bool is_signed{shf.max_shift == MaxShift::S64}; + const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; + const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; + + const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; + v.X(shf.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHF_l_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_l_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_r_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); +} + +void TranslatorVisitor::SHF_r_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 62863aff6..2ab90d1bf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -553,22 +553,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { ThrowNotImplemented(Opcode::SETLMEMBASE); } -void TranslatorVisitor::SHF_l_reg(u64) { - ThrowNotImplemented(Opcode::SHF_l_reg); -} - -void TranslatorVisitor::SHF_l_imm(u64) { - ThrowNotImplemented(Opcode::SHF_l_imm); -} - -void TranslatorVisitor::SHF_r_reg(u64) { - ThrowNotImplemented(Opcode::SHF_r_reg); -} - -void TranslatorVisitor::SHF_r_imm(u64) { - ThrowNotImplemented(Opcode::SHF_r_imm); -} - void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -- cgit v1.2.3 From 7d6ba5b9840a4ba00a9b0f207c1c119d60dcf8b7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 7 Mar 2021 22:01:22 -0500 Subject: shader: Implement R2P --- .../translate/impl/move_register_to_predicate.cpp | 71 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 2 files changed, 71 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; + +void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { + switch (index) { + case 0: + return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); + case 1: + return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); + case 2: + return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); + case 3: + return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); + default: + throw LogicError("Unreachable R2P index"); + } +} + +void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { + union { + u64 raw; + BitField<8, 8, IR::Reg> src_reg; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const r2p{insn}; + const IR::U32 src{v.X(r2p.src_reg)}; + const IR::U32 count{v.ir.Imm32(1)}; + const bool pr_mode{r2p.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset_base{static_cast(r2p.byte_selector) * 8}; + for (u32 index = 0; index < num_items; ++index) { + const IR::U32 offset{v.ir.Imm32(offset_base + index)}; + const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; + const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; + const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; + const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; + if (pr_mode) { + const IR::Pred pred{index}; + v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); + } else { + SetFlag(v.ir, inv_mask_bit, src_bit, index); + } + } +} +} // Anonymous namespace + +void TranslatorVisitor::R2P_reg(u64 insn) { + R2P(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::R2P_cbuf(u64 insn) { + R2P(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::R2P_imm(u64 insn) { + R2P(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2ab90d1bf..fc6030e04 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -513,18 +513,6 @@ void TranslatorVisitor::R2B(u64) { ThrowNotImplemented(Opcode::R2B); } -void TranslatorVisitor::R2P_reg(u64) { - ThrowNotImplemented(Opcode::R2P_reg); -} - -void TranslatorVisitor::R2P_cbuf(u64) { - ThrowNotImplemented(Opcode::R2P_cbuf); -} - -void TranslatorVisitor::R2P_imm(u64) { - ThrowNotImplemented(Opcode::R2P_imm); -} - void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -- cgit v1.2.3 From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: shader: Initial support for textures and TEX --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_sample.cpp | 232 +++++++++++++++++++++ 4 files changed, 235 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 5d0b91598..f2a2ff331 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") INST(SYNC, "SYNC", "1111 0000 1111 1---") -INST(TEX, "TEX", "1100 00-- --11 1---") -INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEX, "TEX", "1100 0--- ---- ----") +INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dbfc04f75..b270bbccd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 3a63fa0477ea8297c80133d35494e1dfdc012f95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 9 Mar 2021 17:14:57 -0300 Subject: shader: Partial implementation of LDC --- .../frontend/maxwell/translate/impl/impl.cpp | 16 +++- .../maxwell/translate/impl/load_constant.cpp | 85 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 4 - 3 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index a5a0e1a9b..7564aeeb2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -56,25 +56,32 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } -IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { +static std::pair CbufAddr(u64 insn) { union { u64 raw; BitField<20, 14, s64> offset; BitField<34, 5, u64> binding; } const cbuf{insn}; + if (cbuf.binding >= 18) { throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); } if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); } - const IR::U32 binding{ir.Imm32(static_cast(cbuf.binding))}; - const IR::U32 byte_offset{ir.Imm32(static_cast(cbuf.offset) * 4)}; + const IR::Value binding{static_cast(cbuf.binding)}; + const IR::Value byte_offset{static_cast(cbuf.offset) * 4}; + return {IR::U32{binding}, IR::U32{byte_offset}}; +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + const auto[binding, byte_offset]{CbufAddr(insn)}; return ir.GetCbuf(binding, byte_offset); } IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { - return ir.BitCast(GetCbuf(insn)); + const auto[binding, byte_offset]{CbufAddr(insn)}; + return ir.GetFloatCbuf(binding, byte_offset); } IR::U32 TranslatorVisitor::GetImm20(u64 insn) { @@ -83,6 +90,7 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; + if (imm.is_negative != 0) { const s64 raw{static_cast(imm.value)}; return ir.Imm32(static_cast(-(1LL << 19) + raw)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..39becf93c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -0,0 +1,85 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, + const IR::U32& reg, const IR::U32& imm) { + switch (mode) { + case Mode::Default: + return {imm_index, ir.IAdd(reg, imm)}; + default: + break; + } + throw NotImplementedException("Mode {}", mode); +} +} // Anonymous namespace + +void TranslatorVisitor::LDC(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; + } const ldc{insn}; + + const IR::U32 imm_index{ir.Imm32(static_cast(ldc.index))}; + const IR::U32 reg{X(ldc.src_reg)}; + const IR::U32 imm{ir.Imm32(static_cast(ldc.offset))}; + const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; + switch (ldc.size) { + case Size::U8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false)); + break; + case Size::S8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true)); + break; + case Size::U16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false)); + break; + case Size::S16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true)); + break; + case Size::B32: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false)); + break; + case Size::B64: { + if (!IR::IsAligned(ldc.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register"); + } + const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))}; + for (int i = 0; i < 2; ++i) { + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid size {}", ldc.size.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ff429c126..5b153acff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -425,10 +425,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDC(u64) { - ThrowNotImplemented(Opcode::LDC); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } -- cgit v1.2.3 From ba8c1d2eb479d04b2b0d847efd67468b688765d4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 10 Mar 2021 22:42:17 -0500 Subject: shader: Implement FCMP still need to configure some settings for NV denorm flush and intel NaN --- .../translate/impl/floating_point_compare.cpp | 116 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 --- 2 files changed, 116 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..21cb80d67 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fcmp{insn}; + + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; + IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; + const IR::U32 src_reg{v.X(fcmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(fcmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FCMP_reg(u64 insn) { + FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_rc(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FCMP_cr(u64 insn) { + FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_imm(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 5b153acff..e1904472f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,22 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FCMP_reg(u64) { - ThrowNotImplemented(Opcode::FCMP_reg); -} - -void TranslatorVisitor::FCMP_rc(u64) { - ThrowNotImplemented(Opcode::FCMP_rc); -} - -void TranslatorVisitor::FCMP_cr(u64) { - ThrowNotImplemented(Opcode::FCMP_cr); -} - -void TranslatorVisitor::FCMP_imm(u64) { - ThrowNotImplemented(Opcode::FCMP_imm); -} - void TranslatorVisitor::FMNMX_reg(u64) { ThrowNotImplemented(Opcode::FMNMX_reg); } -- cgit v1.2.3 From 8d470c2e63c2dac334ccff2bcda9a0607ce76377 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 14 Mar 2021 01:23:56 -0500 Subject: shader: Implement FMNMX And add a const in FCMP --- .../translate/impl/floating_point_compare.cpp | 2 +- .../translate/impl/floating_point_min_max.cpp | 57 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 3 files changed, 58 insertions(+), 13 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index 21cb80d67..f254ecb3a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -88,7 +88,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c3180a9bd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const fmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; + IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; + + if (fmnmx.neg_pred != 0) { + std::swap(min, max); + } + + v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FMNMX_reg(u64 insn) { + FMNMX(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMNMX_cbuf(u64 insn) { + FMNMX(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMNMX_imm(u64 insn) { + FMNMX(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e1904472f..01ecbb4cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,18 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FMNMX_reg(u64) { - ThrowNotImplemented(Opcode::FMNMX_reg); -} - -void TranslatorVisitor::FMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::FMNMX_cbuf); -} - -void TranslatorVisitor::FMNMX_imm(u64) { - ThrowNotImplemented(Opcode::FMNMX_imm); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } -- cgit v1.2.3 From 71f96fa6366dc6dd306a953bca1b958fb32bc55a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Mar 2021 03:41:05 -0300 Subject: shader: Implement CAL inlining function calls --- .../frontend/maxwell/control_flow.cpp | 78 +-- .../frontend/maxwell/control_flow.h | 19 +- src/shader_recompiler/frontend/maxwell/program.cpp | 71 +- .../frontend/maxwell/structured_control_flow.cpp | 770 +++++++++++++++++++++ .../frontend/maxwell/structured_control_flow.h | 24 + .../frontend/maxwell/translate/impl/impl.h | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 4 +- 7 files changed, 869 insertions(+), 99 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/structured_control_flow.h (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index d0dc66330..715c0e92d 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -31,13 +31,12 @@ struct Compare { return lhs.begin < rhs.begin; } }; -} // Anonymous namespace -static u32 BranchOffset(Location pc, Instruction inst) { +u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static void Split(Block* old_block, Block* new_block, Location pc) { +void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } @@ -49,21 +48,19 @@ static void Split(Block* old_block, Block* new_block, Location pc) { .cond{old_block->cond}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, - .ir{nullptr}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, .stack{std::move(old_block->stack)}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{new_block}, .branch_false{nullptr}, - .ir{nullptr}, }; } -static Token OpcodeToken(Opcode opcode) { +Token OpcodeToken(Opcode opcode) { switch (opcode) { case Opcode::PBK: case Opcode::BRK: @@ -89,7 +86,7 @@ static Token OpcodeToken(Opcode opcode) { } } -static bool IsAbsoluteJump(Opcode opcode) { +bool IsAbsoluteJump(Opcode opcode) { switch (opcode) { case Opcode::JCAL: case Opcode::JMP: @@ -100,7 +97,7 @@ static bool IsAbsoluteJump(Opcode opcode) { } } -static bool HasFlowTest(Opcode opcode) { +bool HasFlowTest(Opcode opcode) { switch (opcode) { case Opcode::BRA: case Opcode::BRX: @@ -121,13 +118,14 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string NameOf(const Block& block) { +std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } } +} // Anonymous namespace void Stack::Push(Token token, Location target) { entries.push_back({ @@ -166,26 +164,24 @@ bool Block::Contains(Location pc) const noexcept { return pc >= begin && pc < end; } -Function::Function(Location start_address) +Function::Function(ObjectPool& block_pool, Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block{nullptr}, + .block{block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{true}, + .branch_true{nullptr}, + .branch_false{nullptr}, + })}, .stack{}, }} {} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_} { - functions.emplace_back(start_address); - functions.back().labels.back().block = block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .stack{}, - .cond{IR::Condition{true}}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .ir{nullptr}, - }); + functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -308,11 +304,17 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist - if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.emplace_back(cal_pc); + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + const bool exists{it != functions.end()}; + const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + if (!exists) { + functions.emplace_back(block_pool, cal_pc); } - // Handle CAL like a regular instruction - break; + block->end_class = EndClass::Call; + block->function_call = call_id; + block->return_block = AddLabel(block, block->stack, pc + 1, function_id); + block->end = pc; + return AnalysisState::Branch; } default: break; @@ -348,7 +350,6 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block}, .branch_false{nullptr}, - .ir{nullptr}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -401,16 +402,6 @@ void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } -void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { - const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - // Technically CAL pushes into PRET, but that's implicit in the function call for us - // Insert the function to the function list if it doesn't exist - const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; - if (it == functions.end()) { - functions.emplace_back(cal_pc); - } -} - CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; @@ -455,10 +446,9 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .end{pc}, .end_class{EndClass::Branch}, .stack{stack}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{nullptr}, .branch_false{nullptr}, - .ir{nullptr}, })}; function.labels.push_back(Label{ .address{pc}, @@ -495,6 +485,14 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::Call: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); + dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", + node_uid, block.function_call); + dot += '\n'; + ++node_uid; + break; case EndClass::Exit: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 209c9e551..fe74f210f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -20,16 +20,13 @@ #include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/object_pool.h" -namespace Shader::IR { -class Block; -} - namespace Shader::Maxwell::Flow { using FunctionId = size_t; enum class EndClass { Branch, + Call, Exit, Return, }; @@ -75,9 +72,14 @@ struct Block : boost::intrusive::set_base_hook< EndClass end_class; Stack stack; IR::Condition cond; - Block* branch_true; - Block* branch_false; - IR::Block* ir; + union { + Block* branch_true; + FunctionId function_call; + }; + union { + Block* branch_false; + Block* return_block; + }; }; struct Label { @@ -87,7 +89,7 @@ struct Label { }; struct Function { - Function(Location start_address); + explicit Function(ObjectPool& block_pool, Location start_address); Location entrypoint; boost::container::small_vector labels; @@ -137,7 +139,6 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); - void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b270bbccd..8bfa64326 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,67 +8,44 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { -namespace { -IR::BlockList TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::Function& cfg_function) { - const size_t num_blocks{cfg_function.blocks.size()}; - std::vector blocks(cfg_function.blocks.size()); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - const u32 begin{cfg_block.begin.Offset()}; - const u32 end{cfg_block.end.Offset()}; - blocks[i] = block_pool.Create(inst_pool, begin, end); - cfg_block.ir = blocks[i]; - ++i; - }); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - IR::Block* const block{blocks[i]}; - ++i; - if (cfg_block.end_class != Flow::EndClass::Branch) { - block->SetReturn(); - } else if (cfg_block.cond == IR::Condition{true}) { - block->SetBranch(cfg_block.branch_true->ir); - } else if (cfg_block.cond == IR::Condition{false}) { - block->SetBranch(cfg_block.branch_false->ir); - } else { - block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, - cfg_block.branch_false->ir); - } + +static void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const IR::BlockList& post_order{program.post_order_blocks}; + std::erase_if(program.blocks, [&](IR::Block* block) { + return std::ranges::find(post_order, block) == post_order.end(); }); - return IR::VisitAST(inst_pool, block_pool, blocks, - [&](IR::Block* block) { Translate(env, block); }); } -} // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - auto& functions{program.functions}; - functions.reserve(cfg.Functions().size()); - for (Flow::Function& cfg_function : cfg.Functions()) { - functions.push_back(IR::Function{ - .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, - .post_order_blocks{}, - }); - } + program.blocks = VisitAST(inst_pool, block_pool, env, cfg); + program.post_order_blocks = PostOrder(program.blocks); + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite Optimization::LowerFp16ToFp32(program); - for (IR::Function& function : functions) { - function.post_order_blocks = PostOrder(function.blocks); - Optimization::SsaRewritePass(function.post_order_blocks); - } + + Optimization::SsaRewritePass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - for (IR::Function& function : functions) { - Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); - Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); - Optimization::IdentityRemovalPass(function); - Optimization::VerificationPass(function); - } + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::IdentityRemovalPass(program); + Optimization::VerificationPass(program); Optimization::CollectShaderInfoPass(program); return program; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp new file mode 100644 index 000000000..5f5d9cf17 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -0,0 +1,770 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook>; + +using Tree = boost::intrusive::list, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size>; +using Node = Tree::iterator; +using ConstNode = Tree::const_iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return) : type{StatementType::Return} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_) + : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + IR::Block* code; + Node label; + Tree children; + IR::Condition guest_cond; + Statement* op; + Statement* op_a; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return ""; + } +} + +std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +bool HasNode(const Tree& tree, ConstNode stmt) { + const auto end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { + return true; + } + } + return false; +} + +Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { + const ConstNode label_stmt{goto_stmt->label}; + const ConstNode end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { + return it; + } + } + throw LogicError("Lift label not in tree"); +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { + ++offset; + + const auto end = tree.end(); + for (ConstNode it = tree.begin(); it != end; ++it) { + ++offset; + if (stmt == it) { + return true; + } + if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { + return true; + } + } + return false; +} + +class GotoPass { +public: + explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, + ObjectPool& block_pool_, ObjectPool& stmt_pool) + : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + std::vector gotos{BuildTree(cfg)}; + for (const Node& goto_stmt : gotos | std::views::reverse) { + RemoveGoto(goto_stmt); + } + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (Offset(goto_stmt) > Offset(label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // TODO: Remove this + { + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } + } + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (Offset(goto_stmt) < Offset(label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector BuildTree(Flow::CFG& cfg) { + u32 label_id{0}; + std::vector gotos; + Flow::Function& first_function{cfg.Functions().front()}; + BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); + return gotos; + } + + void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, + std::vector& gotos, Node function_insert_point, + std::optional return_label) { + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false})}; + Tree& root{root_stmt.children}; + std::unordered_map local_labels; + local_labels.reserve(function.blocks.size()); + + for (Flow::Block& block : function.blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + const Node label_it{root.insert(function_insert_point, *label)}; + local_labels.emplace(&block, label_it); + ++label_id; + } + for (Flow::Block& block : function.blocks) { + const Node label{local_labels.at(&block)}; + // Insertion point + const Node ip{std::next(label)}; + + // Reset goto variables before the first block and after its respective label + const auto make_reset_variable{[&]() -> Statement& { + return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); + }}; + root.push_front(make_reset_variable()); + root.insert(ip, make_reset_variable()); + + const u32 begin_offset{block.begin.Offset()}; + const u32 end_offset{block.end.Offset()}; + IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; + root.insert(ip, *pool.Create(ir_block, &root_stmt)); + + switch (block.end_class) { + case Flow::EndClass::Branch: { + Statement* const always_cond{pool.Create(Identity{}, IR::Condition{true})}; + if (block.cond == IR::Condition{true}) { + const Node true_label{local_labels.at(block.branch_true)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); + } else if (block.cond == IR::Condition{false}) { + const Node false_label{local_labels.at(block.branch_false)}; + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } else { + const Node true_label{local_labels.at(block.branch_true)}; + const Node false_label{local_labels.at(block.branch_false)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } + break; + } + case Flow::EndClass::Call: { + Flow::Function& call{cfg.Functions()[block.function_call]}; + const Node call_return_label{local_labels.at(block.return_block)}; + BuildTree(cfg, call, label_id, gotos, ip, call_return_label); + break; + } + case Flow::EndClass::Exit: + root.insert(ip, *pool.Create(Return{})); + break; + case Flow::EndClass::Return: { + Statement* const always_cond{pool.Create(Identity{}, block.cond)}; + auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + break; + } + } + } + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const neg_var{pool.Create(Not{}, variable)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + switch (label_nested_stmt->type) { + case StatementType::If: + // Update nested if condition + label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const auto type{label_nested_stmt->type}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + size_t Offset(ConstNode stmt) const { + size_t offset{0}; + if (!SearchNode(root_stmt.children, stmt, offset)) { + throw LogicError("Node not found in tree"); + } + return offset; + } + + ObjectPool& inst_pool; + ObjectPool& block_pool; + ObjectPool& pool; + Statement root_stmt{FunctionTag{}}; +}; + +IR::Block* TryFindForwardBlock(const Statement& stmt) { + const Tree& tree{stmt.up->children}; + const ConstNode end{tree.cend()}; + ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return forward_node->code; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, + ObjectPool& stmt_pool_, Environment& env_, Statement& root_stmt, + IR::BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, + block_list{block_list_} { + Visit(root_stmt, nullptr, nullptr); + } + +private: + void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { + Tree& tree{parent.children}; + IR::Block* current_block{nullptr}; + + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + if (current_block && current_block != stmt.code) { + IR::IREmitter{*current_block}.Branch(stmt.code); + } + current_block = stmt.code; + Translate(env, stmt.code); + block_list.push_back(stmt.code); + break; + } + case StatementType::SetVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, merge_block, break_block); + + // Implement if header block + IR::Block* const first_if_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*current_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.SelectionMerge(merge_block); + ir.BranchConditional(cond, first_if_block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Loop: { + IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + IR::IREmitter{*current_block}.Branch(loop_header_block); + } + block_list.push_back(loop_header_block); + + IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, new_continue_block, merge_block); + + // The continue block is located at the end of the loop + block_list.push_back(new_continue_block); + + // Implement loop header block + IR::Block* const first_loop_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*loop_header_block}; + ir.LoopMerge(merge_block, new_continue_block); + ir.Branch(first_loop_block); + + // Implement continue block + IR::IREmitter continue_ir{*new_continue_block}; + const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Break: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::Block* const skip_block{MergeBlock(parent, stmt)}; + + IR::IREmitter ir{*current_block}; + ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); + + current_block = skip_block; + break; + } + case StatementType::Return: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Return(); + current_block = nullptr; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block && continue_block) { + IR::IREmitter{*current_block}.Branch(continue_block); + } + } + + IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + if (IR::Block* const block{TryFindForwardBlock(stmt)}) { + return block; + } + // Create a merge block we can visit later + IR::Block* const block{block_pool.Create(inst_pool)}; + Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + return block; + } + + ObjectPool& stmt_pool; + ObjectPool& inst_pool; + ObjectPool& block_pool; + Environment& env; + IR::BlockList& block_list; +}; +} // Anonymous namespace + +IR::BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { + ObjectPool stmt_pool{64}; + GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + Statement& root{goto_pass.RootStatement()}; + IR::BlockList block_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; + return block_list; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h new file mode 100644 index 000000000..e4797291e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -0,0 +1,24 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::BlockList VisitAST(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c6253c40c..45d6f5e06 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -62,7 +62,7 @@ public: void BRA(u64 insn); void BRK(u64 insn); void BRX(u64 insn); - void CAL(u64 insn); + void CAL(); void CCTL(u64 insn); void CCTLL(u64 insn); void CONT(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 01ecbb4cc..92da5c7e8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,8 +65,8 @@ void TranslatorVisitor::BRX(u64) { ThrowNotImplemented(Opcode::BRX); } -void TranslatorVisitor::CAL(u64) { - ThrowNotImplemented(Opcode::CAL); +void TranslatorVisitor::CAL() { + // CAL is a no-op } void TranslatorVisitor::CCTL(u64) { -- cgit v1.2.3 From 17a82b56d74afcebaad78ce4754d8ee99ea66f93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Mar 2021 04:54:43 -0300 Subject: shader: Implement TEXS --- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../maxwell/translate/impl/texture_fetch.cpp | 232 ++++++++++++++++++ .../translate/impl/texture_fetch_swizzled.cpp | 262 +++++++++++++++++++++ .../maxwell/translate/impl/texture_sample.cpp | 232 ------------------ 4 files changed, 494 insertions(+), 236 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 92da5c7e8..9aa7b836c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -553,10 +553,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TEXS(u64) { - ThrowNotImplemented(Opcode::TEXS); -} - void TranslatorVisitor::TLD(u64) { ThrowNotImplemented(Opcode::TLD); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..98d9f4c64 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -0,0 +1,232 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..ac1615b00 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -0,0 +1,262 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<53, 4, u64> encoding; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +template +IR::Value Composite(TranslatorVisitor& v, Args... regs) { + return v.ir.CompositeConstruct(v.F(regs)...); +} + +IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { + return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding texs{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::Reg reg_a{texs.src_reg_a}; + const IR::Reg reg_b{texs.src_reg_b}; + IR::TextureInstInfo info{}; + if (texs.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + switch (texs.encoding) { + case 0: // 1D.LZ + info.type.Assign(TextureType::Color1D); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info); + case 1: // 2D + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); + case 2: // 2D.LZ + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info); + case 3: // 2D.LL + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, + {}, info); + case 4: // 2D.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + {}, {}, {}, info); + case 5: // 2D.LL.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), + v.F(reg_b + 1), v.F(reg_b), {}, {}, info); + case 6: // 2D.LZ.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + zero, {}, {}, info); + case 7: // ARRAY_2D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleImplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + {}, {}, {}, info); + case 8: // ARRAY_2D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + zero, {}, {}, info); + case 9: // ARRAY_2D.LZ.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ShadowArray2D); + return v.ir.ImageSampleDrefExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + v.F(reg_b + 1), zero, {}, {}, info); + case 10: // 3D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 11: // 3D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, + {}, info); + case 12: // CUBE + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 13: // CUBE.LL + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), + v.F(reg_b + 1), {}, {}, info); + default: + throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); + } +} + +unsigned Swizzle(u64 insn) { + const Encoding texs{insn}; + const size_t encoding{texs.swizzle}; + if (texs.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + const bool is_shadow{sample.Type() == IR::Type::F32}; + if (is_shadow) { + const bool is_alpha{component == 3}; + return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; + } else { + return IR::F32{v.ir.CompositeExtract(sample, component)}; + } +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding texs{insn}; + switch (index) { + case 0: + return texs.dest_reg_a; + case 1: + CheckAlignment(texs.dest_reg_a, 2); + return texs.dest_reg_a + 1; + case 2: + return texs.dest_reg_b; + case 3: + CheckAlignment(texs.dest_reg_b, 2); + return texs.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding texs{insn}; + switch (store_index) { + case 1: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEXS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp deleted file mode 100644 index 98d9f4c64..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/ir/modifiers.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -namespace { -enum class Blod : u64 { - None, - LZ, - LB, - LL, - INVALIDBLOD4, - INVALIDBLOD5, - LBA, - LLA, -}; - -enum class TextureType : u64 { - _1D, - ARRAY_1D, - _2D, - ARRAY_2D, - _3D, - ARRAY_3D, - CUBE, - ARRAY_CUBE, -}; - -Shader::TextureType GetType(TextureType type, bool dc) { - switch (type) { - case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; - case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; - case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; - case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; - case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; - case TextureType::ARRAY_3D: - throw NotImplementedException("3D array texture type"); - case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; - case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; - } - throw NotImplementedException("Invalid texture type {}", type); -} - -IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; - switch (type) { - case TextureType::_1D: - return v.F(reg); - case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); - case TextureType::_2D: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); - case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); - case TextureType::_3D: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); - case TextureType::ARRAY_3D: - throw NotImplementedException("3D array texture type"); - case TextureType::CUBE: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); - case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); - } - throw NotImplementedException("Invalid texture type {}", type); -} - -IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { - switch (blod) { - case Blod::None: - return v.ir.Imm32(0.0f); - case Blod::LZ: - return v.ir.Imm32(0.0f); - case Blod::LB: - case Blod::LL: - case Blod::LBA: - case Blod::LLA: - return v.F(reg++); - case Blod::INVALIDBLOD4: - case Blod::INVALIDBLOD5: - break; - } - throw NotImplementedException("Invalid blod {}", blod); -} - -IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { - const IR::U32 value{v.X(reg++)}; - switch (type) { - case TextureType::_1D: - case TextureType::ARRAY_1D: - return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); - case TextureType::_2D: - case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); - case TextureType::_3D: - case TextureType::ARRAY_3D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); - case TextureType::CUBE: - case TextureType::ARRAY_CUBE: - throw NotImplementedException("Illegal offset on CUBE sample"); - } - throw NotImplementedException("Invalid texture type {}", type); -} - -bool HasExplicitLod(Blod blod) { - switch (blod) { - case Blod::LL: - case Blod::LLA: - case Blod::LZ: - return true; - default: - return false; - } -} - -void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, - std::optional cbuf_offset) { - union { - u64 raw; - BitField<35, 1, u64> ndv; - BitField<49, 1, u64> nodep; - BitField<50, 1, u64> dc; - BitField<51, 3, IR::Pred> sparse_pred; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> coord_reg; - BitField<20, 8, IR::Reg> meta_reg; - BitField<28, 3, TextureType> type; - BitField<31, 4, u64> mask; - } const tex{insn}; - - if (lc) { - throw NotImplementedException("LC"); - } - const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; - - IR::Reg meta_reg{tex.meta_reg}; - IR::Value handle; - IR::Value offset; - IR::F32 dref; - IR::F32 lod_clamp; - if (cbuf_offset) { - handle = v.ir.Imm32(*cbuf_offset); - } else { - handle = v.X(meta_reg++); - } - const IR::F32 lod{MakeLod(v, meta_reg, blod)}; - if (aoffi) { - offset = MakeOffset(v, meta_reg, tex.type); - } - if (tex.dc != 0) { - dref = v.F(meta_reg++); - } - IR::TextureInstInfo info{}; - info.type.Assign(GetType(tex.type, tex.dc != 0)); - info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); - info.has_lod_clamp.Assign(lc ? 1 : 0); - - const IR::Value sample{[&]() -> IR::Value { - if (tex.dc == 0) { - if (HasExplicitLod(blod)) { - return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); - } else { - return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); - } - } - if (HasExplicitLod(blod)) { - return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); - } else { - return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); - } - }()}; - - for (int element = 0; element < 4; ++element) { - if (((tex.mask >> element) & 1) == 0) { - continue; - } - IR::F32 value; - if (tex.dc != 0) { - value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); - } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; - } - v.F(tex.dest_reg + element, value); - } - if (tex.sparse_pred != IR::Pred::PT) { - v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); - } -} -} // Anonymous namespace - -void TranslatorVisitor::TEX(u64 insn) { - union { - u64 raw; - BitField<54, 1, u64> aoffi; - BitField<55, 3, Blod> blod; - BitField<58, 1, u64> lc; - BitField<36, 13, u64> cbuf_offset; - } const tex{insn}; - - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); -} - -void TranslatorVisitor::TEX_b(u64 insn) { - union { - u64 raw; - BitField<36, 1, u64> aoffi; - BitField<37, 3, Blod> blod; - BitField<40, 1, u64> lc; - } const tex{insn}; - - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); -} - -} // namespace Shader::Maxwell -- cgit v1.2.3 From fa2f6e38f4d465ba6e5efe6c6bd23d8ef39b080d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 16 Mar 2021 00:57:07 -0400 Subject: shader: Implement FSET and FSETP Also fix oversight with adding SignedZeroInfNanPreserve execution mode. --- .../maxwell/translate/impl/common_funcs.cpp | 48 +++++++++++++++ .../frontend/maxwell/translate/impl/common_funcs.h | 6 ++ .../translate/impl/floating_point_compare.cpp | 68 ---------------------- .../impl/floating_point_compare_and_set.cpp | 65 +++++++++++++++++++++ .../impl/floating_point_set_predicate.cpp | 60 +++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.h | 19 ++++++ .../maxwell/translate/impl/not_implemented.cpp | 24 -------- 7 files changed, 198 insertions(+), 92 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 9d4ac2e36..af9a8f82c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -58,4 +58,52 @@ IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp } } +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid FP compare op {}", compare_op); + } +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index c9ae5c500..f8add3c34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -15,4 +15,10 @@ namespace Shader::Maxwell { const IR::U1& predicate_2, BooleanOp bop); [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); + +[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); + +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, + const IR::F32& operand_2, FPCompareOp compare_op, + IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index f254ecb3a..e78e9c4e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -9,74 +9,6 @@ namespace Shader::Maxwell { namespace { -enum class FPCompareOp : u64 { - F, - LT, - EQ, - LE, - GT, - NE, - GE, - NUM, - Nan, - LTU, - EQU, - LEU, - GTU, - NEU, - GEU, - T, -}; - -bool IsCompareOpOrdered(FPCompareOp op) { - switch (op) { - case FPCompareOp::LTU: - case FPCompareOp::EQU: - case FPCompareOp::LEU: - case FPCompareOp::GTU: - case FPCompareOp::NEU: - case FPCompareOp::GEU: - return false; - default: - return true; - } -} - -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, - FPCompareOp compare_op, IR::FpControl control) { - const bool ordered{IsCompareOpOrdered(compare_op)}; - switch (compare_op) { - case FPCompareOp::F: - return ir.Imm1(false); - case FPCompareOp::LT: - case FPCompareOp::LTU: - return ir.FPLessThan(operand_1, operand_2, control, ordered); - case FPCompareOp::EQ: - case FPCompareOp::EQU: - return ir.FPEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::LE: - case FPCompareOp::LEU: - return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GT: - case FPCompareOp::GTU: - return ir.FPGreaterThan(operand_1, operand_2, control, ordered); - case FPCompareOp::NE: - case FPCompareOp::NEU: - return ir.FPNotEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GE: - case FPCompareOp::GEU: - return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::NUM: - return ir.FPOrdered(operand_1, operand_2); - case FPCompareOp::Nan: - return ir.FPUnordered(operand_1, operand_2); - case FPCompareOp::T: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { union { u64 insn; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..c5417775e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -0,0 +1,65 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<55, 1, u64> ftz; + } const fset{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(fset.pred)}; + if (fset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + + v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FSET_reg(u64 insn) { + FSET(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSET_cbuf(u64 insn) { + FSET(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSET_imm(u64 insn) { + FSET(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..8ff9db843 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fsetp{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const BooleanOp bop{fsetp.bop}; + const FPCompareOp compare_op{fsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; + const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(fsetp.dest_pred_a, result_a); + v.ir.SetPred(fsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::FSETP_reg(u64 insn) { + FSETP(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSETP_cbuf(u64 insn) { + FSETP(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSETP_imm(u64 insn) { + FSETP(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 45d6f5e06..761b64666 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -35,6 +35,25 @@ enum class PredicateOp : u64 { NonZero, }; +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9aa7b836c..b31928370 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,30 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSET_reg(u64) { - ThrowNotImplemented(Opcode::FSET_reg); -} - -void TranslatorVisitor::FSET_cbuf(u64) { - ThrowNotImplemented(Opcode::FSET_cbuf); -} - -void TranslatorVisitor::FSET_imm(u64) { - ThrowNotImplemented(Opcode::FSET_imm); -} - -void TranslatorVisitor::FSETP_reg(u64) { - ThrowNotImplemented(Opcode::FSETP_reg); -} - -void TranslatorVisitor::FSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::FSETP_cbuf); -} - -void TranslatorVisitor::FSETP_imm(u64) { - ThrowNotImplemented(Opcode::FSETP_imm); -} - void TranslatorVisitor::FSWZADD(u64) { ThrowNotImplemented(Opcode::FSWZADD); } -- cgit v1.2.3 From 3b7fd3ad0fcb0419c455c16127f43d01b6dc7fc9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 17 Mar 2021 00:53:53 -0400 Subject: shader: Implement CSET and CSETP --- .../maxwell/translate/impl/condition_code_set.cpp | 54 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 8 ---- 2 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..ea0c40a54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::CSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + } const cset{insn}; + + const IR::U32 one_mask{ir.Imm32(-1)}; + const IR::U32 fp_one{ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{ir.Imm32(0)}; + const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; + const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; + const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; + const IR::U32 result{ir.Select(pred_result, pass_result, fail_result)}; + X(cset.dest_reg, result); +} + +void TranslatorVisitor::CSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, BooleanOp> bop; + } const csetp{insn}; + + const BooleanOp bop{csetp.bop}; + const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; + const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; + ir.SetPred(csetp.dest_pred_a, result_a); + ir.SetPred(csetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b31928370..0325f14ea 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -85,14 +85,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::CSET(u64) { - ThrowNotImplemented(Opcode::CSET); -} - -void TranslatorVisitor::CSETP(u64) { - ThrowNotImplemented(Opcode::CSETP); -} - void TranslatorVisitor::DADD_reg(u64) { ThrowNotImplemented(Opcode::DADD_reg); } -- cgit v1.2.3 From 72990df7bad1c81d6ebc51179d34e1bfc71e0caf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 18 Mar 2021 02:53:57 -0400 Subject: shader: Implement DADD --- .../frontend/maxwell/translate/impl/double_add.cpp | 67 ++++++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 52 ++++++++++++++++- .../frontend/maxwell/translate/impl/impl.h | 3 + .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 4 files changed, 120 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..bece191d7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + } const dadd{insn}; + + if (!IR::IsAligned(dadd.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value()); + } + if (!IR::IsAligned(dadd.src_a_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value()); + } + if (dadd.cc != 0) { + throw NotImplementedException("DADD CC"); + } + + const IR::Reg reg_a{dadd.src_a_reg}; + const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))}; + const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; + + IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dadd.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)}; + const IR::Value result{v.ir.UnpackDouble2x32(value)}; + + for (int i = 0; i < 2; i++) { + v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::DADD_reg(u64 insn) { + DADD(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DADD_cbuf(u64 insn) { + DADD(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DADD_imm(u64 insn) { + DADD(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7564aeeb2..e444dcd4f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -7,6 +7,15 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { +namespace { +[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, + u32 offset) { + if (unaligned) { + return ir.Imm32(0); + } + return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); +} +} // Anonymous namespace IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); @@ -56,6 +65,18 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } +IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> src; + } const index{insn}; + const IR::Reg reg{index.src}; + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1))); +} + static std::pair CbufAddr(u64 insn) { union { u64 raw; @@ -75,15 +96,31 @@ static std::pair CbufAddr(u64 insn) { } IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { - const auto[binding, byte_offset]{CbufAddr(insn)}; + const auto [binding, byte_offset]{CbufAddr(insn)}; return ir.GetCbuf(binding, byte_offset); } IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { - const auto[binding, byte_offset]{CbufAddr(insn)}; + const auto [binding, byte_offset]{CbufAddr(insn)}; return ir.GetFloatCbuf(binding, byte_offset); } +IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + const auto [binding, offset_value]{CbufAddr(insn)}; + const bool unaligned{cbuf.unaligned != 0}; + const u32 offset{offset_value.U32()}; + const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + + const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; + const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; + return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -110,6 +147,17 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { return ir.Imm32(Common::BitCast(value | sign_bit)); } +IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; + const u64 value{imm.value << 44}; + return ir.Imm64(Common::BitCast(value | sign_bit)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 761b64666..e3e298c3b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -351,12 +351,15 @@ public: [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); + [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0325f14ea..9675cef54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -85,18 +85,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::DADD_reg(u64) { - ThrowNotImplemented(Opcode::DADD_reg); -} - -void TranslatorVisitor::DADD_cbuf(u64) { - ThrowNotImplemented(Opcode::DADD_cbuf); -} - -void TranslatorVisitor::DADD_imm(u64) { - ThrowNotImplemented(Opcode::DADD_imm); -} - void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -- cgit v1.2.3 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- .../frontend/maxwell/control_flow.cpp | 31 ++++++-- .../frontend/maxwell/control_flow.h | 3 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/structured_control_flow.cpp | 18 +++++ .../frontend/maxwell/translate/impl/exit.cpp | 15 ---- .../maxwell/translate/impl/exit_program.cpp | 43 +++++++++++ .../frontend/maxwell/translate/impl/impl.h | 4 +- .../translate/impl/load_store_attribute.cpp | 86 +++++++++++++++++++++- .../maxwell/translate/impl/not_implemented.cpp | 16 +--- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- 11 files changed, 178 insertions(+), 43 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 715c0e92d..4f6707fae 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) { case Opcode::EXIT: case Opcode::JMP: case Opcode::JMX: + case Opcode::KIL: case Opcode::BRK: case Opcode::CONT: case Opcode::LONGJMP: @@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati block->end = pc; return AnalysisState::Branch; } + case Opcode::KIL: { + const Predicate pred{inst.Pred()}; + const auto ir_pred{static_cast(pred.index)}; + const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); + return AnalysisState::Branch; + } case Opcode::PBK: case Opcode::PCNT: case Opcode::PEXIT: @@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond, - bool visit_conditional_inst) { + EndClass insn_end_class, IR::Condition cond) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block block->branch_false = endif_block; - // And branch to it from the conditional instruction if it is a branch - if (insn_end_class == EndClass::Branch) { + // And branch to it from the conditional instruction if it is a branch or a kill instruction + // Kill instructions are considered a branch because they demote to a helper invocation and + // execution may continue. + if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { conditional_block->cond = IR::Condition{true}; conditional_block->branch_true = endif_block; conditional_block->branch_false = nullptr; @@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { @@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } - block->end = pc; + block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; } @@ -505,6 +514,12 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; + case EndClass::Kill: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; } } if (function.entrypoint == 8) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index fe74f210f..22f134194 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -29,6 +29,7 @@ enum class EndClass { Call, Exit, Return, + Kill, }; enum class Token { @@ -130,7 +131,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond, bool visit_conditional_inst); + IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8bfa64326..0074eb89b 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolid, DumpExpr(stmt->op)); break; @@ -424,6 +430,9 @@ private: gotos.push_back(root.insert(ip, *goto_stmt)); break; } + case Flow::EndClass::Kill: + root.insert(ip, *pool.Create(Kill{})); + break; } } } @@ -729,6 +738,15 @@ private: current_block = nullptr; break; } + case StatementType::Kill: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp deleted file mode 100644 index e98bbd0d1..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { - -void TranslatorVisitor::EXIT(u64) { - ir.Exit(); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..ea9b33da9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + throw NotImplementedException("Sample mask"); + } + if (sph.ps.omap.depth != 0) { + throw NotImplementedException("Fragment depth"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index e3e298c3b..ed81d9c36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -108,7 +108,7 @@ public: void DSETP_reg(u64 insn); void DSETP_cbuf(u64 insn); void DSETP_imm(u64 insn); - void EXIT(u64 insn); + void EXIT(); void F2F_reg(u64 insn); void F2F_cbuf(u64 insn); void F2F_imm(u64 insn); @@ -220,7 +220,7 @@ public: void JCAL(u64 insn); void JMP(u64 insn); void JMX(u64 insn); - void KIL(u64 insn); + void KIL(); void LD(u64 insn); void LDC(u64 insn); void LDG(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index ad97786d4..2922145ee 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -11,6 +11,13 @@ namespace Shader::Maxwell { namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + enum class InterpolationMode : u64 { Pass, Multiply, @@ -23,8 +30,85 @@ enum class SampleMode : u64 { Centroid, Offset, }; + +int NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} } // Anonymous namespace +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> stream_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + if (ald.o != 0) { + throw NotImplementedException("O"); + } + if (ald.patch != 0) { + throw NotImplementedException("P"); + } + if (ald.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed"); + } + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ald.size)}; + for (int element = 0; element < num_elements; ++element) { + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + } +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> stream_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.patch != 0) { + throw NotImplementedException("P"); + } + if (ast.stream_reg != IR::Reg::RZ) { + throw NotImplementedException("Stream store"); + } + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ast.size)}; + for (int element = 0; element < num_elements; ++element) { + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + } +} + void TranslatorVisitor::IPA(u64 insn) { // IPA is the instruction used to read varyings from a fragment shader. // gl_FragCoord is mapped to the gl_Position attribute. @@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) { // } const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; if (is_indexed) { - throw NotImplementedException("IPA.IDX"); + throw NotImplementedException("IDX"); } const IR::Attribute attribute{ipa.attribute}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9675cef54..59252bcc5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) { ThrowNotImplemented(Opcode::AL2P); } -void TranslatorVisitor::ALD(u64) { - ThrowNotImplemented(Opcode::ALD); -} - -void TranslatorVisitor::AST(u64) { - ThrowNotImplemented(Opcode::AST); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } @@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::EXIT(u64) { - throw LogicError("Visting EXIT instruction"); -} - void TranslatorVisitor::F2F_reg(u64) { ThrowNotImplemented(Opcode::F2F_reg); } @@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) { ThrowNotImplemented(Opcode::JMX); } -void TranslatorVisitor::KIL(u64) { - ThrowNotImplemented(Opcode::KIL); +void TranslatorVisitor::KIL() { + // KIL is a no-op } void TranslatorVisitor::LD(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 98d9f4c64..0fbb87ec4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const tex{insn}; - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset * 4)); } void TranslatorVisitor::TEX_b(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index ac1615b00..54f0df754 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { IR::Value Sample(TranslatorVisitor& v, u64 insn) { const Encoding texs{insn}; - const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset * 4))}; const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::Reg reg_a{texs.src_reg_a}; const IR::Reg reg_b{texs.src_reg_b}; -- cgit v1.2.3 From eeb1efa2d2947faed55340e8aec3a0187a3729a1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 20:28:26 -0300 Subject: shader: Implement LOP32I --- .../maxwell/translate/impl/logic_operation.cpp | 59 +++++++++++++++++----- .../maxwell/translate/impl/not_implemented.cpp | 4 -- 2 files changed, 45 insertions(+), 18 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp index e786a388e..89e5cd6de 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -32,34 +32,51 @@ enum class LogicalOp : u64 { } } -void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, + LogicalOp bit_op, std::optional pred_op = std::nullopt, + IR::Pred dest_pred = IR::Pred::PT) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; - BitField<39, 1, u64> neg_a; - BitField<40, 1, u64> neg_b; - BitField<41, 2, LogicalOp> bit_op; - BitField<43, 1, u64> x; - BitField<44, 2, PredicateOp> pred_op; - BitField<48, 3, IR::Pred> pred; } const lop{insn}; - if (lop.x != 0) { - throw NotImplementedException("LOP X"); + if (x) { + throw NotImplementedException("X"); + } + if (cc) { + throw NotImplementedException("CC"); } IR::U32 op_a{v.X(lop.src_reg)}; - if (lop.neg_a != 0) { + if (inv_a != 0) { op_a = v.ir.BitwiseNot(op_a); } - if (lop.neg_b != 0) { + if (inv_b != 0) { op_b = v.ir.BitwiseNot(op_b); } - const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)}; - const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)}; + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; + if (pred_op) { + const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; + v.ir.SetPred(dest_pred, pred_result); + } v.X(lop.dest_reg, result); - v.ir.SetPred(lop.pred, pred_result); +} + +void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<39, 1, u64> inv_a; + BitField<40, 1, u64> inv_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> dest_pred; + } const lop{insn}; + + LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, + lop.pred_op, lop.dest_pred); } } // Anonymous namespace @@ -74,4 +91,18 @@ void TranslatorVisitor::LOP_cbuf(u64 insn) { void TranslatorVisitor::LOP_imm(u64 insn) { LOP(*this, insn, GetImm20(insn)); } + +void TranslatorVisitor::LOP32I(u64 insn) { + union { + u64 raw; + BitField<53, 2, LogicalOp> bit_op; + BitField<57, 1, u64> x; + BitField<52, 1, u64> cc; + BitField<55, 1, u64> inv_a; + BitField<56, 1, u64> inv_b; + } const lop32i{insn}; + + LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, + lop32i.inv_b != 0, lop32i.bit_op); +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 59252bcc5..a4367fc5a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -357,10 +357,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::LOP32I(u64) { - ThrowNotImplemented(Opcode::LOP32I); -} - void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -- cgit v1.2.3 From c97d03efb9e02f89cca6dfea4c8d5c37fc4a2adc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 21:14:58 -0300 Subject: shader: Implement ISCADD (imm) --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index f06046d4d..5469e445a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -57,8 +57,8 @@ void TranslatorVisitor::ISCADD_cbuf(u64) { throw NotImplementedException("ISCADD (cbuf)"); } -void TranslatorVisitor::ISCADD_imm(u64) { - throw NotImplementedException("ISCADD (imm)"); +void TranslatorVisitor::ISCADD_imm(u64 insn) { + ISCADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::ISCADD32I(u64) { -- cgit v1.2.3 From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: shader: Implement I2F --- .../frontend/maxwell/translate/impl/impl.cpp | 21 +++ .../frontend/maxwell/translate/impl/impl.h | 2 + .../impl/integer_floating_point_conversion.cpp | 173 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- 6 files changed, 198 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index e444dcd4f..c9af83010 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -121,6 +121,22 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); } +IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + if (cbuf.unaligned != 0) { + throw NotImplementedException("Unaligned packed constant buffer read"); + } + const auto [binding, lower_offset]{CbufAddr(insn)}; + const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; + const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; + const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; + return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -158,6 +174,11 @@ IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { return ir.Imm64(Common::BitCast(value | sign_bit)); } +IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { + const s64 value{GetImm20(insn).U32()}; + return ir.Imm64(static_cast(static_cast(value) << 32)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ed81d9c36..cb66cca25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -356,10 +356,12 @@ public: [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); + [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); + [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..e8b5ae1d2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -0,0 +1,173 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class IntFormat : u64 { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, FloatFormat> float_format; + BitField<10, 2, IntFormat> int_format; + BitField<13, 1, u64> is_signed; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 2, u64> selector; + BitField<47, 1, u64> cc; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; +}; + +bool Is64(u64 insn) { + return Encoding{insn}.int_format == IntFormat::U64; +} + +int BitSize(FloatFormat format) { + switch (format) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + } + throw NotImplementedException("Invalid float format {}", format); +} + +IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { + const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; + const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; + const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; + const IR::U1 is_least{v.ir.IEqual(value, least_value)}; + return IR::U32{v.ir.Select(is_least, value, absolute)}; +} + +void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { + const Encoding i2f{insn}; + if (i2f.cc != 0) { + throw NotImplementedException("CC"); + } + const bool is_signed{i2f.is_signed != 0}; + int src_bitsize{}; + switch (i2f.int_format) { + case IntFormat::U8: + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(8), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 8); + } + src_bitsize = 8; + break; + case IntFormat::U16: + if (i2f.selector == 1 || i2f.selector == 3) { + throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); + } + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(16), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 16); + } + src_bitsize = 16; + break; + case IntFormat::U32: + case IntFormat::U64: + if (i2f.selector != 0) { + throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); + } + if (i2f.abs != 0 && is_signed) { + src = v.ir.IAbs(src); + } + src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; + break; + } + const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; + const int dst_bitsize{BitSize(i2f.float_format)}; + IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + if (i2f.neg != 0) { + if (i2f.abs != 0 || !is_signed) { + // We know the value is positive + value = v.ir.FPNeg(value); + } else { + // Only negate if the input isn't the lowest value + IR::U1 is_least; + if (src_bitsize == 64) { + is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else { + const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; + is_least = v.ir.IEqual(src, least_value); + } + value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; + } + } + switch (i2f.float_format) { + case FloatFormat::F16: { + const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); + break; + } + case FloatFormat::F32: + v.F(i2f.dest_reg, value); + break; + case FloatFormat::F64: { + if (!IR::IsAligned(i2f.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); + } + const IR::Value vector{v.ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; ++i) { + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2F_reg(u64 insn) { + if (Is64(insn)) { + union { + u64 raw; + BitField<20, 8, IR::Reg> reg; + } const value{insn}; + const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; + I2F(*this, insn, ir.PackUint2x32(regs)); + } else { + I2F(*this, insn, GetReg20(insn)); + } +} + +void TranslatorVisitor::I2F_cbuf(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedCbuf(insn)); + } else { + I2F(*this, insn, GetCbuf(insn)); + } +} + +void TranslatorVisitor::I2F_imm(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedImm20(insn)); + } else { + I2F(*this, insn, GetImm20(insn)); + } +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4367fc5a..4078feafa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -241,18 +241,6 @@ void TranslatorVisitor::HSETP2_imm(u64) { ThrowNotImplemented(Opcode::HSETP2_imm); } -void TranslatorVisitor::I2F_reg(u64) { - ThrowNotImplemented(Opcode::I2F_reg); -} - -void TranslatorVisitor::I2F_cbuf(u64) { - ThrowNotImplemented(Opcode::I2F_cbuf); -} - -void TranslatorVisitor::I2F_imm(u64) { - ThrowNotImplemented(Opcode::I2F_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 0fbb87ec4..b691b4d1f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -56,7 +56,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 54f0df754..d5fda20f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -65,7 +65,7 @@ IR::Value Composite(TranslatorVisitor& v, Args... regs) { } IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { - return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); + return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); } IR::Value Sample(TranslatorVisitor& v, u64 insn) { -- cgit v1.2.3 From e802512d8e49cc4a92c0c09fe023576c2a2ab3db Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 20 Mar 2021 21:22:21 +0100 Subject: shader: Refactor half floating instructions --- .../translate/impl/half_floating_point_add.cpp | 60 +--------------------- .../translate/impl/half_floating_point_helper.cpp | 49 ++++++++++++++++++ .../translate/impl/half_floating_point_helper.h | 31 +++++++++++ 3 files changed, 82 insertions(+), 58 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index c292d5e87..19e3401ca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -2,66 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" namespace Shader::Maxwell { namespace { -enum class Merge : u64 { - H1_H0, - F32, - MRG_H0, - MRG_H1, -}; - -enum class Swizzle : u64 { - H1_H0, - F32, - H0_H0, - H1_H1, -}; - -std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { - switch (swizzle) { - case Swizzle::H1_H0: { - const IR::Value vector{ir.UnpackFloat2x16(value)}; - return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; - } - case Swizzle::H0_H0: { - const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; - return {scalar, scalar}; - } - case Swizzle::H1_H1: { - const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; - return {scalar, scalar}; - } - case Swizzle::F32: { - const IR::F32 scalar{ir.BitCast(value)}; - return {scalar, scalar}; - } - } - throw InvalidArgument("Invalid swizzle {}", swizzle); -} - -IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, - Merge merge) { - switch (merge) { - case Merge::H1_H0: - return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); - case Merge::F32: - return ir.BitCast(ir.FPConvert(32, lhs)); - case Merge::MRG_H0: - case Merge::MRG_H1: { - const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; - const bool h0{merge == Merge::MRG_H0}; - const IR::F16& insert{h0 ? lhs : rhs}; - return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); - } - } - throw InvalidArgument("Invalid merge {}", merge); -} void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { @@ -122,7 +66,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); } -} // Anonymous namespace +} // namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..930822092 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -0,0 +1,49 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool h0{merge == Merge::MRG_H0}; + const IR::F16& insert{h0 ? lhs : rhs}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..0933b595e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -0,0 +1,31 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge); + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 28dff6a6298b714019aa10a47f5a9e3f3f689067 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 01:03:21 +0100 Subject: shader: Implement HFMA2 --- .../half_floating_point_fused_multiply_add.cpp | 170 +++++++++++++++++++++ .../translate/impl/half_floating_point_helper.cpp | 13 ++ .../translate/impl/half_floating_point_helper.h | 8 + .../maxwell/translate/impl/not_implemented.cpp | 20 --- 4 files changed, 191 insertions(+), 20 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..2f3996274 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -0,0 +1,170 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { + +void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, + Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, + bool sat, HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hfma2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; + const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + if (lhs_c.Type() == IR::Type::F16) { + lhs_c = v.ir.FPConvert(32, lhs_c); + rhs_c = v.ir.FPConvert(32, rhs_c); + } + } + + lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); + + lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); + rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{HalfPrecision2FmzMode(precision)}, + }; + IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); +} + +void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, + Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, + HalfPrecision precision) { + union { + u64 raw; + BitField<47, 2, Swizzle> swizzle_a; + BitField<49, 2, Merge> merge; + } const hfma2{insn}; + + HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, + sat, precision); +} + +} // namespace + +void TranslatorVisitor::HFMA2_reg(u64 insn) { + union { + u64 raw; + BitField<28, 2, Swizzle> swizzle_b; + BitField<32, 1, u64> saturate; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> neg_c; + BitField<35, 2, Swizzle> swizzle_c; + BitField<37, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, + GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_rc(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_b; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, + GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_cr(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, + GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_imm(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + + HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), + GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; + BitField<20, 32, u64> imm32; + BitField<52, 1, u64> neg_c; + BitField<53, 2, Swizzle> swizzle_a; + BitField<55, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast(hfma2.imm32)}; + HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, + Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp index 930822092..d0c6ba1aa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -6,6 +6,19 @@ namespace Shader::Maxwell { +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { + switch (precision) { + case HalfPrecision::None: + return IR::FmzMode::None; + case HalfPrecision::FTZ: + return IR::FmzMode::FTZ; + case HalfPrecision::FMZ: + return IR::FmzMode::FMZ; + default: + return IR::FmzMode::DontCare; + } +} + std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { switch (swizzle) { case Swizzle::H1_H0: { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index 0933b595e..f26ef0949 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -23,6 +23,14 @@ enum class Swizzle : u64 { H1_H1, }; +enum class HalfPrecision : u64 { + None = 0, + FTZ = 1, + FMZ = 2, +}; + +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); + std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4078feafa..ddfca8d7a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,26 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HFMA2_reg(u64) { - ThrowNotImplemented(Opcode::HFMA2_reg); -} - -void TranslatorVisitor::HFMA2_rc(u64) { - ThrowNotImplemented(Opcode::HFMA2_rc); -} - -void TranslatorVisitor::HFMA2_cr(u64) { - ThrowNotImplemented(Opcode::HFMA2_cr); -} - -void TranslatorVisitor::HFMA2_imm(u64) { - ThrowNotImplemented(Opcode::HFMA2_imm); -} - -void TranslatorVisitor::HFMA2_32I(u64) { - ThrowNotImplemented(Opcode::HFMA2_32I); -} - void TranslatorVisitor::HMUL2_reg(u64) { ThrowNotImplemented(Opcode::HMUL2_reg); } -- cgit v1.2.3 From ed6cd3c94ac10b434a1240fc3cbed2050766be65 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 02:08:04 +0100 Subject: shader: Implement HMUL2 --- .../impl/half_floating_point_multiply.cpp | 143 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 --- 2 files changed, 143 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..ff34a8c8f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -0,0 +1,143 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { + +void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, + HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hmul2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{HalfPrecision2FmzMode(precision)}, + }; + IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); +} + +void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, + Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<47, 2, Swizzle> swizzle_a; + BitField<39, 2, HalfPrecision> precision; + } const hmul2{insn}; + + HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, + hmul2.precision); +} +} // namespace + +void TranslatorVisitor::HMUL2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<44, 1, u64> abs_a; + BitField<28, 2, Swizzle> swizzle_b; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, + hmul2.swizzle_b, GetReg20(insn)); +} + +void TranslatorVisitor::HMUL2_cbuf(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<54, 1, u64> abs_b; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, + Swizzle::F32, GetCbuf(insn)); +} + +void TranslatorVisitor::HMUL2_imm(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, + Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HMUL2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 2, HalfPrecision> precision; + BitField<52, 1, u64> sat; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hmul2{insn}; + + const u32 imm{static_cast(hmul2.imm32)}; + HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, + Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ddfca8d7a..6c159301f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,22 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HMUL2_reg(u64) { - ThrowNotImplemented(Opcode::HMUL2_reg); -} - -void TranslatorVisitor::HMUL2_cbuf(u64) { - ThrowNotImplemented(Opcode::HMUL2_cbuf); -} - -void TranslatorVisitor::HMUL2_imm(u64) { - ThrowNotImplemented(Opcode::HMUL2_imm); -} - -void TranslatorVisitor::HMUL2_32I(u64) { - ThrowNotImplemented(Opcode::HMUL2_32I); -} - void TranslatorVisitor::HSET2_reg(u64) { ThrowNotImplemented(Opcode::HSET2_reg); } -- cgit v1.2.3 From 9e213fd861d264cf79d7a6ed0268a57c87306b9b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 03:48:40 +0100 Subject: shader: Implement HSET2 --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/half_floating_point_helper.h | 1 + .../translate/impl/half_floating_point_set.cpp | 115 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 4 files changed, 118 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index f2a2ff331..1b87d04fc 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -105,8 +105,8 @@ INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") -INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 1100 1--- ----") -INST(HSET2_imm, "HSET2 (imm)", "0111 1100 0--- ----") +INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----") +INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----") INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index f26ef0949..24063b2ab 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -5,6 +5,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..4825ca06a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -0,0 +1,115 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, + bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hset2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + // TODO: Implement FP16 FloatingPointCompare + //if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + //} + + lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(hset2.pred)}; + if (hset2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; + + const u32 true_value = bf ? 0x3c00 : 0xffff; + const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; + const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; + const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; + + v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); +} +} // Anonymous namespace + +void TranslatorVisitor::HSET2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> bf; + BitField<31, 1, u64> neg_b; + BitField<50, 1, u64> ftz; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hset2{insn}; + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, + hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); +} + +void TranslatorVisitor::HSET2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + } const hset2{insn}; + + HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, + hset2.compare_op, Swizzle::F32); +} + +void TranslatorVisitor::HSET2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; + + const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, + hset2.compare_op, Swizzle::H1_H0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6c159301f..d1aeceef1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,18 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HSET2_reg(u64) { - ThrowNotImplemented(Opcode::HSET2_reg); -} - -void TranslatorVisitor::HSET2_cbuf(u64) { - ThrowNotImplemented(Opcode::HSET2_cbuf); -} - -void TranslatorVisitor::HSET2_imm(u64) { - ThrowNotImplemented(Opcode::HSET2_imm); -} - void TranslatorVisitor::HSETP2_reg(u64) { ThrowNotImplemented(Opcode::HSETP2_reg); } -- cgit v1.2.3 From e10d9c1b8e21912d34c02a22b5812b94fc27502b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:24:30 +0100 Subject: shader: Implement HSETP2 --- .../impl/half_floating_point_set_predicate.cpp | 116 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 2 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..6b1ac21d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, + Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { + union { + u64 insn; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<6, 1, u64> ftz; + BitField<47, 2, Swizzle> swizzle_a; + } const hsetp2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + // TODO: Implement FP16 FloatingPointCompare + // if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + //} + + lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; + if (hsetp2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; + + if (h_and) { + auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); + v.ir.SetPred(hsetp2.dest_pred_a, result); + v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); + } else { + v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); + v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); + } +} +} // Anonymous namespace + +void TranslatorVisitor::HSETP2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> h_and; + BitField<31, 1, u64> neg_b; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hsetp2{insn}; + HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> abs_b; + BitField<56, 1, u64> neg_b; + BitField<49, 4, FPCompareOp> compare_op; + } const hsetp2{insn}; + + HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hsetp2{insn}; + + const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + + HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, + hsetp2.h_and != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index d1aeceef1..bd3c1f9d6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,18 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HSETP2_reg(u64) { - ThrowNotImplemented(Opcode::HSETP2_reg); -} - -void TranslatorVisitor::HSETP2_cbuf(u64) { - ThrowNotImplemented(Opcode::HSETP2_cbuf); -} - -void TranslatorVisitor::HSETP2_imm(u64) { - ThrowNotImplemented(Opcode::HSETP2_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From 27fb97377eeb40849260ea866a90519521c6f59b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:33:19 +0100 Subject: shader: Fix floating point comparison for FP16 --- .../maxwell/translate/impl/common_funcs.cpp | 2 +- .../frontend/maxwell/translate/impl/common_funcs.h | 4 +-- .../translate/impl/half_floating_point_set.cpp | 38 +++++++++++----------- .../impl/half_floating_point_set_predicate.cpp | 20 ++++++------ 4 files changed, 32 insertions(+), 32 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index af9a8f82c..d30e82b10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,7 +72,7 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f8add3c34..f584060b3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -18,7 +18,7 @@ namespace Shader::Maxwell { [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); -[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, - const IR::F32& operand_2, FPCompareOp compare_op, +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 4825ca06a..1d28c0531 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -22,8 +22,8 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - //if (lhs_a.Type() != lhs_b.Type()) { + + if (lhs_a.Type() != lhs_b.Type()) { if (lhs_a.Type() == IR::Type::F16) { lhs_a = v.ir.FPConvert(32, lhs_a); rhs_a = v.ir.FPConvert(32, rhs_a); @@ -32,7 +32,7 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f lhs_b = v.ir.FPConvert(32, lhs_b); rhs_b = v.ir.FPConvert(32, rhs_b); } - //} + } lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); @@ -94,22 +94,22 @@ void TranslatorVisitor::HSET2_cbuf(u64 insn) { } void TranslatorVisitor::HSET2_imm(u64 insn) { - union { - u64 insn; - BitField<53, 1, u64> bf; - BitField<54, 1, u64> ftz; - BitField<49, 4, FPCompareOp> compare_op; - BitField<56, 1, u64> neg_high; - BitField<30, 9, u64> high; - BitField<29, 1, u64> neg_low; - BitField<20, 9, u64> low; - } const hset2{insn}; - - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; - - HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, - hset2.compare_op, Swizzle::H1_H0); + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; + + const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, + Swizzle::H1_H0); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 6b1ac21d5..3e2a23c92 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -24,17 +24,17 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - // if (lhs_a.Type() != lhs_b.Type()) { - if (lhs_a.Type() == IR::Type::F16) { - lhs_a = v.ir.FPConvert(32, lhs_a); - rhs_a = v.ir.FPConvert(32, rhs_a); - } - if (lhs_b.Type() == IR::Type::F16) { - lhs_b = v.ir.FPConvert(32, lhs_b); - rhs_b = v.ir.FPConvert(32, rhs_b); + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } } - //} lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); -- cgit v1.2.3 From a77e764726938a26803fa90a9c69ccdd32ab09cd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 00:42:56 -0300 Subject: shader: Add support for fp16 comparisons and misc fixes --- .../frontend/maxwell/translate/impl/half_floating_point_add.cpp | 3 +-- .../maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | 4 +--- .../frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | 3 +-- .../frontend/maxwell/translate/impl/half_floating_point_set.cpp | 1 + 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 19e3401ca..03e7bf047 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { union { @@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 2f3996274..8b234bd6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, HalfPrecision precision) { @@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, sat, precision); } - -} // namespace +} // Anonymous namespace void TranslatorVisitor::HFMA2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index ff34a8c8f..2451a6ef6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, HalfPrecision precision) { @@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, hmul2.precision); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HMUL2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 1d28c0531..7f1f4b88c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) { BitField<35, 4, FPCompareOp> compare_op; BitField<28, 2, Swizzle> swizzle_b; } const hset2{insn}; + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); } -- cgit v1.2.3 From 112b8f00f0da0e031bb62a7a7a44469d3a5518a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 01:32:02 -0400 Subject: shader: Add FP64 register load/store helpers --- .../frontend/maxwell/translate/impl/double_add.cpp | 16 ++----------- .../frontend/maxwell/translate/impl/impl.cpp | 27 ++++++++++++++++------ .../frontend/maxwell/translate/impl/impl.h | 2 ++ 3 files changed, 24 insertions(+), 21 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index bece191d7..3db09d0c2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -22,19 +22,11 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<48, 1, u64> neg_a; BitField<49, 1, u64> abs_b; } const dadd{insn}; - - if (!IR::IsAligned(dadd.dest_reg, 2)) { - throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value()); - } - if (!IR::IsAligned(dadd.src_a_reg, 2)) { - throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value()); - } if (dadd.cc != 0) { throw NotImplementedException("DADD CC"); } - const IR::Reg reg_a{dadd.src_a_reg}; - const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))}; + const IR::F64 src_a{v.D(dadd.src_a_reg)}; const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; @@ -43,12 +35,8 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { .rounding{CastFpRounding(dadd.fp_rounding)}, .fmz_mode{IR::FmzMode::None}, }; - const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)}; - const IR::Value result{v.ir.UnpackDouble2x32(value)}; - for (int i = 0; i < 2; i++) { - v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)}); - } + v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index c9af83010..2d2f6f9c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -25,6 +25,13 @@ IR::F32 TranslatorVisitor::F(IR::Reg reg) { return ir.BitCast(X(reg)); } +IR::F64 TranslatorVisitor::D(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } @@ -33,6 +40,16 @@ void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast(value)); } +void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } +} + IR::U32 TranslatorVisitor::GetReg8(u64 insn) { union { u64 raw; @@ -68,13 +85,9 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { union { u64 raw; - BitField<20, 8, IR::Reg> src; - } const index{insn}; - const IR::Reg reg{index.src}; - if (!IR::IsAligned(reg, 2)) { - throw NotImplementedException("Unaligned source register {}", reg); - } - return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1))); + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); } static std::pair CbufAddr(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index cb66cca25..1a1073fa7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -342,9 +342,11 @@ public: [[nodiscard]] IR::U32 X(IR::Reg reg); [[nodiscard]] IR::F32 F(IR::Reg reg); + [[nodiscard]] IR::F64 D(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + void D(IR::Reg dest_reg, const IR::F64& value); [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); -- cgit v1.2.3 From c858b8ba97d3ff79dcff0795c1184ee356f2cd1a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 02:09:14 -0400 Subject: shader: Implement DMUL and DFMA Also add a missing const on DADD --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 2 +- .../frontend/maxwell/translate/impl/double_add.cpp | 2 +- .../translate/impl/double_fused_multiply_add.cpp | 53 ++++++++++++++++++++++ .../maxwell/translate/impl/double_multiply.cpp | 45 ++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 8 ++++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/not_implemented.cpp | 28 ------------ 7 files changed, 109 insertions(+), 30 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1b87d04fc..1dfaeb92f 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -35,7 +35,7 @@ INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") -INST(DFMA_cr, "DFMA (cr)", "0010 1011 0111 ----") +INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index 3db09d0c2..ac1433dea 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -30,7 +30,7 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; - IR::FpControl control{ + const IR::FpControl control{ .no_contraction{true}, .rounding{CastFpRounding(dadd.fp_rounding)}, .fmz_mode{IR::FmzMode::None}, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..ff7321862 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<50, 2, FpRounding> fp_rounding; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + } const dfma{insn}; + + const IR::F64 src_a{v.D(dfma.src_a_reg)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; + const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; + + const IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dfma.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + + v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DFMA_reg(u64 insn) { + DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_cr(u64 insn) { + DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_rc(u64 insn) { + DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DFMA_imm(u64 insn) { + DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..3e83d1c95 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<48, 1, u64> neg; + } const dmul{insn}; + + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; + const IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dmul.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + + v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DMUL_reg(u64 insn) { + DMUL(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMUL_cbuf(u64 insn) { + DMUL(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMUL_imm(u64 insn) { + DMUL(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 2d2f6f9c6..758a0230a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -90,6 +90,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { return D(reg.index); } +IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); +} + static std::pair CbufAddr(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 1a1073fa7..c994fe803 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -354,6 +354,7 @@ public: [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index bd3c1f9d6..4e069912a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -81,22 +81,6 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::DFMA_reg(u64) { - ThrowNotImplemented(Opcode::DFMA_reg); -} - -void TranslatorVisitor::DFMA_rc(u64) { - ThrowNotImplemented(Opcode::DFMA_rc); -} - -void TranslatorVisitor::DFMA_cr(u64) { - ThrowNotImplemented(Opcode::DFMA_cr); -} - -void TranslatorVisitor::DFMA_imm(u64) { - ThrowNotImplemented(Opcode::DFMA_imm); -} - void TranslatorVisitor::DMNMX_reg(u64) { ThrowNotImplemented(Opcode::DMNMX_reg); } @@ -109,18 +93,6 @@ void TranslatorVisitor::DMNMX_imm(u64) { ThrowNotImplemented(Opcode::DMNMX_imm); } -void TranslatorVisitor::DMUL_reg(u64) { - ThrowNotImplemented(Opcode::DMUL_reg); -} - -void TranslatorVisitor::DMUL_cbuf(u64) { - ThrowNotImplemented(Opcode::DMUL_cbuf); -} - -void TranslatorVisitor::DMUL_imm(u64) { - ThrowNotImplemented(Opcode::DMUL_imm); -} - void TranslatorVisitor::DSET_reg(u64) { ThrowNotImplemented(Opcode::DSET_reg); } -- cgit v1.2.3 From a62f04efab4331eeabd4441962f86a5e87db3f2d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 09:32:16 +0100 Subject: shader: Implement F2F --- .../floating_point_conversion_floating_point.cpp | 180 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- 2 files changed, 180 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..1e366fde0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -0,0 +1,180 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class RoundingOp : u64 { + None = 0, + Pass = 3, + Round = 8, + Floor = 9, + Ceil = 10, + Trunc = 11, +}; + +[[nodiscard]] u32 WidthSize(FloatFormat width) { + switch (width) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg; + BitField<50, 1, u64> sat; + BitField<39, 4, u64> rounding_op; + BitField<39, 2, FpRounding> rounding; + BitField<10, 2, FloatFormat> src_size; + BitField<8, 2, FloatFormat> dst_size; + + [[nodiscard]] RoundingOp RoundingOperation() const { + constexpr u64 rounding_mask = 0x0B; + return static_cast(rounding_op.Value() & rounding_mask); + } + } const f2f{insn}; + + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; + + const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; + IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + if (f2f.src_size != f2f.dst_size) { + fp_control.rounding = CastFpRounding(f2f.rounding); + input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); + } else { + switch (f2f.RoundingOperation()) { + case RoundingOp::None: + case RoundingOp::Pass: + // Make sure NANs are handled properly + switch (f2f.src_size) { + case FloatFormat::F16: + input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); + break; + case FloatFormat::F32: + input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); + break; + case FloatFormat::F64: + input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); + break; + } + break; + case RoundingOp::Round: + input = v.ir.FPRoundEven(input, fp_control); + break; + case RoundingOp::Floor: + input = v.ir.FPFloor(input, fp_control); + break; + case RoundingOp::Ceil: + input = v.ir.FPCeil(input, fp_control); + break; + case RoundingOp::Trunc: + input = v.ir.FPTrunc(input, fp_control); + break; + default: + throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); + } + } + if (f2f.sat != 0 && !any_fp64) { + input = v.ir.FPSaturate(input); + } + + switch (f2f.dst_size) { + case FloatFormat::F16: { + const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); + break; + } + case FloatFormat::F32: + v.F(f2f.dest_reg, input); + break; + case FloatFormat::F64: + v.D(f2f.dest_reg, input); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2F_reg(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatReg20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleReg20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_cbuf(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatCbuf(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleCbuf(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { + throw NotImplementedException("Instruction"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4e069912a..08f6eb788 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -117,18 +117,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::F2F_reg(u64) { - ThrowNotImplemented(Opcode::F2F_reg); -} - -void TranslatorVisitor::F2F_cbuf(u64) { - ThrowNotImplemented(Opcode::F2F_cbuf); -} - -void TranslatorVisitor::F2F_imm(u64) { - ThrowNotImplemented(Opcode::F2F_imm); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } -- cgit v1.2.3 From 56be556eee65335cdc896bb1eb47999d04850b77 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 18:23:16 +0100 Subject: shader: Implement FADD32I --- .../maxwell/translate/impl/floating_point_add.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 76a807d4e..487198aa6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -64,8 +64,21 @@ void TranslatorVisitor::FADD_imm(u64 insn) { FADD(*this, insn, GetFloatImm20(insn)); } -void TranslatorVisitor::FADD32I(u64) { - throw NotImplementedException("FADD32I"); +void TranslatorVisitor::FADD32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<53, 1, u64> neg_b; + BitField<54, 1, u64> abs_a; + BitField<52, 1, u64> cc; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd32i{insn}; + + FADD(*this, insn, fadd32i.sat != 0, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, + GetFloatImm32(insn), fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, + fadd32i.neg_b != 0); } } // namespace Shader::Maxwell -- cgit v1.2.3 From e4e1cc11b8f7649171fe922b2899e57120bfba53 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 19:28:37 -0400 Subject: shader: Implement DMNMX, DSET, DSETP --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/double_compare_and_set.cpp | 59 ++++++++++++++++++++++ .../maxwell/translate/impl/double_min_max.cpp | 50 ++++++++++++++++++ .../translate/impl/double_set_predicate.cpp | 54 ++++++++++++++++++++ .../translate/impl/floating_point_min_max.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 36 ------------- 6 files changed, 166 insertions(+), 39 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1dfaeb92f..c6cd2a79b 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -37,8 +37,8 @@ INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") -INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") -INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") +INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..e2ec852c9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + } const dset{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; + + IR::U1 pred{v.ir.GetPred(dset.pred)}; + if (dset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + + v.X(dset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DSET_reg(u64 insn) { + DSET(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSET_cbuf(u64 insn) { + DSET(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSET_imm(u64 insn) { + DSET(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..55a224db3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const dmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; + + IR::F64 max{v.ir.FPMax(op_a, op_b)}; + IR::F64 min{v.ir.FPMin(op_a, op_b)}; + + if (dmnmx.neg_pred != 0) { + std::swap(min, max); + } + v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DMNMX_reg(u64 insn) { + DMNMX(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMNMX_cbuf(u64 insn) { + DMNMX(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMNMX_imm(u64 insn) { + DMNMX(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + } const dsetp{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; + + const BooleanOp bop{dsetp.bop}; + const FPCompareOp compare_op{dsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; + const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(dsetp.dest_pred_a, result_a); + v.ir.SetPred(dsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::DSETP_reg(u64 insn) { + DSETP(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSETP_cbuf(u64 insn) { + DSETP(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSETP_imm(u64 insn) { + DSETP(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index c3180a9bd..343d91032 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -24,7 +24,7 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; - const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ .no_contraction{false}, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 08f6eb788..27b12ff3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -81,42 +81,6 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::DMNMX_reg(u64) { - ThrowNotImplemented(Opcode::DMNMX_reg); -} - -void TranslatorVisitor::DMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::DMNMX_cbuf); -} - -void TranslatorVisitor::DMNMX_imm(u64) { - ThrowNotImplemented(Opcode::DMNMX_imm); -} - -void TranslatorVisitor::DSET_reg(u64) { - ThrowNotImplemented(Opcode::DSET_reg); -} - -void TranslatorVisitor::DSET_cbuf(u64) { - ThrowNotImplemented(Opcode::DSET_cbuf); -} - -void TranslatorVisitor::DSET_imm(u64) { - ThrowNotImplemented(Opcode::DSET_imm); -} - -void TranslatorVisitor::DSETP_reg(u64) { - ThrowNotImplemented(Opcode::DSETP_reg); -} - -void TranslatorVisitor::DSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::DSETP_cbuf); -} - -void TranslatorVisitor::DSETP_imm(u64) { - ThrowNotImplemented(Opcode::DSETP_imm); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } -- cgit v1.2.3 From 96b7ced6ec32ccd3da94ebfcfe74a7568cce509f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 21:01:47 -0300 Subject: shader: Better but still partial interpolation support --- .../frontend/maxwell/translate/impl/load_store_attribute.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 2922145ee..516ffec2d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -141,14 +141,16 @@ void TranslatorVisitor::IPA(u64 insn) { const IR::Attribute attribute{ipa.attribute}; IR::F32 value{ir.GetAttribute(attribute)}; if (IR::IsGeneric(attribute)) { - // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; - const bool is_perspective{false}; + const ProgramHeader& sph{env.SPH()}; + const u32 attr_index{IR::GenericAttributeIndex(attribute)}; + const u32 element{static_cast(attribute) % 4}; + const std::array input_map{sph.ps.GenericInputMap(attr_index)}; + const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; if (is_perspective) { - const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; - value = ir.FPMul(value, rcp_position_w); + const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; + value = ir.FPMul(value, position_w); } } - switch (ipa.interpolation_mode) { case InterpolationMode::Pass: break; -- cgit v1.2.3 From a8d8fd40f7d7b249c542e4694953e2da5998fbaf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 14:39:59 -0300 Subject: shader: Fix TEX mask --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b691b4d1f..d2626f3e7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -188,6 +188,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, } }()}; + IR::Reg dest_reg{tex.dest_reg}; for (int element = 0; element < 4; ++element) { if (((tex.mask >> element) & 1) == 0) { continue; @@ -198,7 +199,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, } else { value = IR::F32{v.ir.CompositeExtract(sample, element)}; } - v.F(tex.dest_reg + element, value); + v.F(dest_reg, value); + ++dest_reg; } if (tex.sparse_pred != IR::Pred::PT) { v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); -- cgit v1.2.3 From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/vote.cpp | 52 ++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 27b12ff3c..c0e36a7e2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) { ThrowNotImplemented(Opcode::VMNMX); } -void TranslatorVisitor::VOTE(u64) { - ThrowNotImplemented(Opcode::VOTE); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..a88894a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 68a9505d8a1d00c6ba2739bc0af3069cf87b9b84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:33:45 -0300 Subject: shader: Implement NDC [-1, 1], attribute types and default varying initialization --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cec03e73e..fdac1c95a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -634,6 +634,9 @@ public: : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); + + IR::IREmitter ir{*block_list.front()}; + ir.Prologue(); } private: @@ -734,7 +737,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IR::IREmitter{*current_block}.Return(); + IR::IREmitter ir{*current_block}; + ir.Epilogue(); + ir.Return(); current_block = nullptr; break; } -- cgit v1.2.3 From 8cb9443cb99c4510e6ef26a91d09a31a8fa6281f Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 00:02:30 +0100 Subject: shader: Fix F2I --- .../impl/floating_point_conversion_integer.cpp | 88 ++++++++++++++++++++-- .../frontend/maxwell/translate/impl/impl.cpp | 17 +++++ .../frontend/maxwell/translate/impl/impl.h | 2 + 3 files changed, 102 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 81175627f..7c5a72800 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" @@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) { } } +std::pair ClampBounds(DestFormat format, bool is_signed) { + if (is_signed) { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } else { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } + throw NotImplementedException("Invalid destination format {}", format); +} + IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { union { u64 raw; @@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // For example converting F32 65537.0 to U16, the expected value is 0xffff, const bool is_signed{f2i.is_signed != 0}; - const size_t bitsize{BitSize(f2i.dest_format)}; - const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; + const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); + + IR::F16F32F64 intermediate; + switch (f2i.src_format) { + case SrcFormat::F16: { + const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(max_bound)))}; + const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(min_bound)))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F32: { + const IR::F32 max_val{v.ir.Imm32(static_cast(max_bound))}; + const IR::F32 min_val{v.ir.Imm32(static_cast(min_bound))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F64: { + const IR::F64 max_val{v.ir.Imm64(max_bound)}; + const IR::F64 min_val{v.ir.Imm64(min_bound)}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + default: + throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); + } + + const size_t bitsize{std::max(32, BitSize(f2i.dest_format))}; + IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; + + bool handled_special_case = false; + const bool special_nan_cases = + (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); + if (special_nan_cases) { + if (f2i.dest_format == DestFormat::I32) { + handled_special_case = true; + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; + } else if (f2i.dest_format == DestFormat::I64) { + handled_special_case = true; + result = IR::U64{ + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + } + } + if (!handled_special_case && is_signed) { + if (bitsize != 64) { + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; + } else { + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + } + } if (bitsize == 64) { - const IR::Value vector{v.ir.UnpackUint2x32(result)}; - v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); - v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + v.L(f2i.dest_reg, result); } else { v.X(f2i.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 758a0230a..9bae89c10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); } +IR::U64 TranslatorVisitor::L(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + IR::F32 TranslatorVisitor::F(IR::Reg reg) { return ir.BitCast(X(reg)); } @@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackUint2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } +} + void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast(value)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c994fe803..54c31deb4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -341,10 +341,12 @@ public: void XMAD_imm(u64 insn); [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::U64 L(IR::Reg reg); [[nodiscard]] IR::F32 F(IR::Reg reg); [[nodiscard]] IR::F64 D(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + void L(IR::Reg dest_reg, const IR::U64& value); void F(IR::Reg dest_reg, const IR::F32& value); void D(IR::Reg dest_reg, const IR::F64& value); -- cgit v1.2.3 From 83a283fa867d0a09742faff11d9115acc95ea556 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:36:59 -0300 Subject: shader: Minor style nits --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 7c5a72800..ef55b9c75 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -69,7 +69,8 @@ std::pair ClampBounds(DestFormat format, bool is_signed) { case DestFormat::I64: return {static_cast(std::numeric_limits::max()), static_cast(std::numeric_limits::min())}; - default: {} + default: + break; } } else { switch (format) { @@ -82,7 +83,8 @@ std::pair ClampBounds(DestFormat format, bool is_signed) { case DestFormat::I64: return {static_cast(std::numeric_limits::max()), static_cast(std::numeric_limits::min())}; - default: {} + default: + break; } } throw NotImplementedException("Invalid destination format {}", format); -- cgit v1.2.3 From d3dad6b6320f680b4e85ab991941d15cbce0e616 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 19:37:44 -0300 Subject: shader: Properly insert Prologue instruction --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index fdac1c95a..79e344986 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -635,7 +635,8 @@ public: block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); - IR::IREmitter ir{*block_list.front()}; + IR::Block& first_block{*block_list.front()}; + IR::IREmitter ir{first_block, first_block.begin()}; ir.Prologue(); } -- cgit v1.2.3 From 32c5483beb2f79f5d55eb2906f2bfdfa1698bca3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Mar 2021 11:31:37 -0400 Subject: shader: Implement SHFL --- .../maxwell/translate/impl/integer_scaled_add.cpp | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../maxwell/translate/impl/warp_shuffle.cpp | 69 ++++++++++++++++++++++ 3 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 5469e445a..42fd42bb1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) { ISCADD(*this, insn, GetReg20(insn)); } -void TranslatorVisitor::ISCADD_cbuf(u64) { - throw NotImplementedException("ISCADD (cbuf)"); +void TranslatorVisitor::ISCADD_cbuf(u64 insn) { + ISCADD(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::ISCADD_imm(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c0e36a7e2..3ccd7b925 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { ThrowNotImplemented(Opcode::SETLMEMBASE); } -void TranslatorVisitor::SHFL(u64) { - ThrowNotImplemented(Opcode::SHFL); -} - void TranslatorVisitor::SSY() { // SSY is a no-op } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ShuffleMode : u64 { + IDX, + UP, + DOWN, + BFLY, +}; + +[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, + const IR::U32& index, const IR::U32& mask, + ShuffleMode shfl_op) { + const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; + const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; + switch (shfl_op) { + case ShuffleMode::IDX: + return ir.ShuffleIndex(value, index, clamp, seg_mask); + case ShuffleMode::UP: + return ir.ShuffleUp(value, index, clamp, seg_mask); + case ShuffleMode::DOWN: + return ir.ShuffleDown(value, index, clamp, seg_mask); + case ShuffleMode::BFLY: + return ir.ShuffleButterfly(value, index, clamp, seg_mask); + default: + throw NotImplementedException("Invalid SHFL op {}", shfl_op); + } +} + +void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<30, 2, ShuffleMode> mode; + BitField<48, 3, IR::Pred> pred; + } const shfl{insn}; + + const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; + v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); + v.X(shfl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHFL(u64 insn) { + union { + u64 insn; + BitField<20, 5, u64> src_a_imm; + BitField<28, 1, u64> src_a_flag; + BitField<29, 1, u64> src_b_flag; + BitField<34, 13, u64> src_b_imm; + } const flags{insn}; + const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast(flags.src_a_imm)) + : GetReg20(insn)}; + const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast(flags.src_b_imm)) + : GetReg39(insn)}; + Shuffle(*this, insn, src_a, src_b); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From c7c518e280d1ac04adb08d45145690fd06ac7b18 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 23:41:55 +0100 Subject: shader: Implement TLD4 and TLD4_B --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_gather.cpp | 209 +++++++++++++++++++++ 3 files changed, 211 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index c6cd2a79b..d668dc1aa 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -254,8 +254,8 @@ INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") -INST(TLD4, "TLD4", "1100 10-- --11 1---") -INST(TLD4_b, "TLD4 (b)", "1101 1110 1111 1---") +INST(TLD4, "TLD4", "1100 10-- ---- ----") +INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") INST(TLDS, "TLDS", "1101 -01- ---- ----") INST(TMML, "TMML", "1101 1111 0101 1---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3ccd7b925..e59c3326e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -349,14 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { ThrowNotImplemented(Opcode::TLD_b); } -void TranslatorVisitor::TLD4(u64) { - ThrowNotImplemented(Opcode::TLD4); -} - -void TranslatorVisitor::TLD4_b(u64) { - ThrowNotImplemented(Opcode::TLD4_b); -} - void TranslatorVisitor::TLD4S(u64) { ThrowNotImplemented(Opcode::TLD4S); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..d64865876 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -0,0 +1,209 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +enum class OffsetType : u64 { + None = 0, + AOFFI, + PTP, + Invalid, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { + const IR::U32 value1{v.X(reg++)}; + const IR::U32 value2{v.X(reg++)}; + const auto getVector = ([&v](const IR::U32& value) { + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); + }); + return {getVector(value1), getVector(value2)}; +} + +void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, + bool is_bindless) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld4{insn}; + + const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; + + IR::Reg meta_reg{tld4.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::Value offset2; + IR::F32 dref; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tld4.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + switch (offset_type) { + case OffsetType::None: + break; + case OffsetType::AOFFI: { + offset = MakeOffset(v, meta_reg, tld4.type); + break; + } + case OffsetType::PTP: { + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + break; + } + default: + throw NotImplementedException("Invalid offset type {}", offset_type); + } + if (tld4.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld4.type, tld4.dc != 0)); + info.gather_component.Assign(static_cast(component_type)); + const IR::Value sample{[&]() -> IR::Value { + if (tld4.dc == 0) { + return v.ir.ImageGather(handle, coords, offset, offset2, info); + } + return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); + }()}; + + IR::Reg dest_reg{tld4.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld4.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld4.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4(u64 insn) { + union { + u64 raw; + BitField<56, 2, ComponentType> component; + BitField<54, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, false); +} + +void TranslatorVisitor::TLD4_b(u64 insn) { + union { + u64 raw; + BitField<38, 2, ComponentType> component; + BitField<36, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From fda0835300a7ef6112791ae503435c81ffe883f5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 25 Mar 2021 19:59:35 +0100 Subject: shader: Implement TLD4S. --- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../translate/impl/texture_gather_swizzled.cpp | 133 +++++++++++++++++++++ 2 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e59c3326e..788765c21 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -349,10 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { ThrowNotImplemented(Opcode::TLD_b); } -void TranslatorVisitor::TLD4S(u64) { - ThrowNotImplemented(Opcode::TLD4S); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..beab515ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -0,0 +1,133 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F32, + F16, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +union Encoding { + u64 raw; + BitField<55, 1, Precision> precision; + BitField<52, 2, ComponentType> component_type; + BitField<51, 1, u64> aoffi; + BitField<50, 1, u64> dc; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tld4s{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(tld4s.cbuf_offset * 4))}; + const IR::Reg reg_a{tld4s.src_reg_a}; + const IR::Reg reg_b{tld4s.src_reg_b}; + IR::TextureInstInfo info{}; + if (tld4s.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.gather_component.Assign(static_cast(tld4s.component_type.Value())); + info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + IR::Value coords; + if (tld4s.aoffi != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::Value offset = MakeOffset(v, reg_b); + if (tld4s.dc != 0) { + CheckAlignment(reg_b, 2); + IR::F32 dref = v.F(reg_b + 1); + return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); + } + return v.ir.ImageGather(handle, coords, offset, {}, info); + } + if (tld4s.dc != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::F32 dref = v.F(reg_b); + return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); + } + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); + return v.ir.ImageGather(handle, coords, {}, {}, info); +} + +IR::Reg RegStoreComponent32(u64 insn, size_t index) { + const Encoding tlds4{insn}; + switch (index) { + case 0: + return tlds4.dest_reg_a; + case 1: + CheckAlignment(tlds4.dest_reg_a, 2); + return tlds4.dest_reg_a + 1; + case 2: + return tlds4.dest_reg_b; + case 3: + CheckAlignment(tlds4.dest_reg_b, 2); + return tlds4.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + for (size_t component = 0; component < 4; ++component) { + const IR::Reg dest{RegStoreComponent32(insn, component)}; + v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + std::array swizzled; + for (size_t component = 0; component < 4; ++component) { + swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; + } + const Encoding tld4s{insn}; + v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4S(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From f5672777c8af4700c9e0fc32af52cb2563f564f4 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 25 Mar 2021 20:27:09 +0100 Subject: shader: Implement FragDepth --- src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp index ea9b33da9..58a53c0ec 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -25,7 +25,7 @@ void ExitFragment(TranslatorVisitor& v) { throw NotImplementedException("Sample mask"); } if (sph.ps.omap.depth != 0) { - throw NotImplementedException("Fragment depth"); + v.ir.SetFragDepth(v.F(src_reg + 1)); } } } // Anonymous namespace -- cgit v1.2.3 From 981eb6f43bb88f1e57b4c657bf37cb7471a113e3 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 01:54:03 +0100 Subject: shader: Fix Array Indices in TEX/TLD4 --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 6 +++--- .../frontend/maxwell/translate/impl/texture_gather.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index d2626f3e7..1f1689c43 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -61,11 +61,11 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -73,7 +73,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); } throw NotImplementedException("Invalid texture type {}", type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index d64865876..8c6384040 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -65,11 +65,11 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -77,7 +77,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); } throw NotImplementedException("Invalid texture type {}", type); } -- cgit v1.2.3 From 742d11c2ad948c8630be15901514ec9e5e5fcd20 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 16:02:04 +0100 Subject: shader: Implement TLD4.PTP --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 14 ++++++++------ .../frontend/maxwell/translate/impl/texture_gather.cpp | 16 ++++++++-------- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 1f1689c43..b2da079f9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -101,16 +101,18 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { switch (type) { case TextureType::_1D: case TextureType::ARRAY_1D: - return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); case TextureType::_2D: case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); case TextureType::_3D: case TextureType::ARRAY_3D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); case TextureType::CUBE: case TextureType::ARRAY_CUBE: throw NotImplementedException("Illegal offset on CUBE sample"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index 8c6384040..cdf5cb5c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { throw NotImplementedException("Invalid texture type {}", type); } -std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { +IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { const IR::U32 value1{v.X(reg++)}; const IR::U32 value2{v.X(reg++)}; - const auto getVector = ([&v](const IR::U32& value) { + const IR::U32 bitsize = v.ir.Imm32(6); + const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { return v.ir.CompositeConstruct( - v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); }); - return {getVector(value1), getVector(value2)}; + return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), + getVector(value2, 0), getVector(value2, 16)); } void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, @@ -155,7 +155,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy break; } case OffsetType::PTP: { - std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + offset2 = MakeOffsetPTP(v, meta_reg); break; } default: -- cgit v1.2.3 From d9c5bd9509e82fcde72c18663989931f97ed6518 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 16:46:07 -0300 Subject: shader: Refactor PTP and other minor changes --- .../maxwell/translate/impl/texture_gather.cpp | 28 ++++++++++------------ 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index cdf5cb5c4..b2f9cda46 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { throw NotImplementedException("Invalid texture type {}", type); } -IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { +std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { const IR::U32 value1{v.X(reg++)}; const IR::U32 value2{v.X(reg++)}; - const IR::U32 bitsize = v.ir.Imm32(6); - const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { - return v.ir.CompositeConstruct( - v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), - v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); - }); - return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), - getVector(value2, 0), getVector(value2, 16)); + const IR::U32 bitsize{v.ir.Imm32(6)}; + const auto make_vector{[&v, &bitsize](const IR::U32& value) { + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); + }}; + return {make_vector(value1), make_vector(value2)}; } void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, @@ -150,14 +150,12 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy switch (offset_type) { case OffsetType::None: break; - case OffsetType::AOFFI: { + case OffsetType::AOFFI: offset = MakeOffset(v, meta_reg, tld4.type); break; - } - case OffsetType::PTP: { - offset2 = MakeOffsetPTP(v, meta_reg); + case OffsetType::PTP: + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); break; - } default: throw NotImplementedException("Invalid offset type {}", offset_type); } @@ -167,7 +165,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy IR::TextureInstInfo info{}; info.type.Assign(GetType(tld4.type, tld4.dc != 0)); info.gather_component.Assign(static_cast(component_type)); - const IR::Value sample{[&]() -> IR::Value { + const IR::Value sample{[&] { if (tld4.dc == 0) { return v.ir.ImageGather(handle, coords, offset, offset2, info); } -- cgit v1.2.3 From 17063d16a3cfe6542e74265739191e1d018fc456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:45:38 -0300 Subject: shader: Implement TXQ and fix FragDepth --- .../maxwell/translate/impl/not_implemented.cpp | 8 --- .../maxwell/translate/impl/texture_query.cpp | 76 ++++++++++++++++++++++ 2 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 788765c21..96ee2e741 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -373,14 +373,6 @@ void TranslatorVisitor::TXD_b(u64) { ThrowNotImplemented(Opcode::TXD_b); } -void TranslatorVisitor::TXQ(u64) { - ThrowNotImplemented(Opcode::TXQ); -} - -void TranslatorVisitor::TXQ_b(u64) { - ThrowNotImplemented(Opcode::TXQ_b); -} - void TranslatorVisitor::VABSDIFF(u64) { ThrowNotImplemented(Opcode::VABSDIFF); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..e8ea8faeb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -0,0 +1,76 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Dimension = 1, + TextureType = 2, + SamplePos = 5, +}; + +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { + switch (mode) { + case Mode::Dimension: { + const IR::U32 lod{v.X(src_reg)}; + return v.ir.ImageQueryDimension(handle, lod); + } + case Mode::TextureType: + case Mode::SamplePos: + default: + throw NotImplementedException("Mode {}", mode); + } +} + +void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<22, 3, Mode> mode; + BitField<31, 4, u64> mask; + } const txq{insn}; + + IR::Reg src_reg{txq.src_reg}; + IR::U32 handle; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(src_reg); + ++src_reg; + } + const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + IR::Reg dest_reg{txq.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((txq.mask >> element) & 1) == 0) { + continue; + } + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXQ(u64 insn) { + union { + u64 raw; + BitField<36, 13, u64> cbuf_offset; + } const txq{insn}; + + Impl(*this, insn, static_cast(txq.cbuf_offset)); +} + +void TranslatorVisitor::TXQ_b(u64 insn) { + Impl(*this, insn, std::nullopt); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From cdf0cc38698bf31773edd0016d5171bd11b966d0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 04:19:38 +0100 Subject: shader: Fix TXQ --- src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index e8ea8faeb..8c7e04bca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -66,7 +66,7 @@ void TranslatorVisitor::TXQ(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const txq{insn}; - Impl(*this, insn, static_cast(txq.cbuf_offset)); + Impl(*this, insn, static_cast(txq.cbuf_offset * 4)); } void TranslatorVisitor::TXQ_b(u64 insn) { -- cgit v1.2.3 From a806b29cb9bb48c4a9628700946231c9150463b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:54:32 -0300 Subject: shader: Fix structured control flow on KIL instructions This could potentially leave unvisited blocks, leading to illegal phi nodes. --- .../frontend/maxwell/structured_control_flow.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 79e344986..9d4688390 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -152,7 +152,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { switch (stmt->type) { case StatementType::Code: - ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, + stmt->code->LocationBegin(), stmt->code->LocationEnd(), + reinterpret_cast(stmt->code)); break; case StatementType::Goto: ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), @@ -749,8 +751,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); - current_block = nullptr; + IR::Block* demote_block{MergeBlock(parent, stmt)}; + IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block); + current_block = demote_block; break; } default: -- cgit v1.2.3 From 675a82416d7775dc7a252a5d8f5b704e6b8f2326 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 03:08:31 -0300 Subject: spirv: Remove dependencies on Environment when generating SPIR-V --- src/shader_recompiler/frontend/maxwell/program.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0074eb89b..6efaf6ee0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -33,6 +33,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Sat, 27 Mar 2021 04:59:58 -0300 Subject: shader: Better interpolation and disabled attributes support --- src/shader_recompiler/frontend/maxwell/program.cpp | 35 ++++++++++++++++++++++ .../translate/impl/load_store_attribute.cpp | 10 +------ 2 files changed, 36 insertions(+), 9 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 6efaf6ee0..a914a91f4 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -27,6 +27,40 @@ static void RemoveUnreachableBlocks(IR::Program& program) { }); } +static void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; @@ -51,6 +85,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Sat, 27 Mar 2021 05:26:29 -0300 Subject: shader: Add missing I2I exception when CC is used --- .../frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index ca28c6dd9..e8f35552c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -52,10 +52,14 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<13, 1, u64> src_fmt_sign; BitField<41, 3, u64> selector; BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; BitField<49, 1, u64> abs; BitField<50, 1, u64> sat; } const i2i{insn}; + if (i2i.cc != 0) { + throw NotImplementedException("I2I CC"); + } if (i2i.sat != 0) { throw NotImplementedException("I2I SAT"); } -- cgit v1.2.3 From 51475e21ba5e9a17730a2b5a868dc73d53db9bc1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Mar 2021 19:47:00 -0400 Subject: shader: Implement VMAD, VMNMX, VSETP --- .../maxwell/translate/impl/not_implemented.cpp | 13 --- .../maxwell/translate/impl/video_helper.cpp | 30 +++++++ .../frontend/maxwell/translate/impl/video_helper.h | 23 ++++++ .../translate/impl/video_minimum_maximum.cpp | 92 ++++++++++++++++++++++ .../maxwell/translate/impl/video_multiply_add.cpp | 64 +++++++++++++++ .../maxwell/translate/impl/video_set_predicate.cpp | 92 ++++++++++++++++++++++ 6 files changed, 301 insertions(+), 13 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 96ee2e741..409216640 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -385,14 +385,6 @@ void TranslatorVisitor::VADD(u64) { ThrowNotImplemented(Opcode::VADD); } -void TranslatorVisitor::VMAD(u64) { - ThrowNotImplemented(Opcode::VMAD); -} - -void TranslatorVisitor::VMNMX(u64) { - ThrowNotImplemented(Opcode::VMNMX); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } @@ -400,11 +392,6 @@ void TranslatorVisitor::VOTE_vtg(u64) { void TranslatorVisitor::VSET(u64) { ThrowNotImplemented(Opcode::VSET); } - -void TranslatorVisitor::VSETP(u64) { - ThrowNotImplemented(Opcode::VSETP); -} - void TranslatorVisitor::VSHL(u64) { ThrowNotImplemented(Opcode::VSHL); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { + +IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, + u32 selector, bool is_signed) { + switch (width) { + case VideoWidth::Byte: + case VideoWidth::Unknown: + return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); + case VideoWidth::Short: + return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); + case VideoWidth::Word: + return value; + default: + throw NotImplementedException("Unknown VideoWidth {}", width); + } +} + +VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { + // immediates must be 16-bit format. + return is_immediate ? VideoWidth::Short : width; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +enum class VideoWidth : u64 { + Byte, + Unknown, + Short, + Word, +}; + +[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, + VideoWidth width, u32 selector, bool is_signed); + +[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VideoMinMaxOps : u64 { + MRG_16H, + MRG_16L, + MRG_8B0, + MRG_8B2, + ACC, + MIN, + MAX, +}; + +[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, + VideoMinMaxOps op, bool is_signed) { + switch (op) { + case VideoMinMaxOps::MIN: + return ir.IMin(lhs, rhs, is_signed); + case VideoMinMaxOps::MAX: + return ir.IMax(lhs, rhs, is_signed); + default: + throw NotImplementedException("VMNMX op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VMNMX(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 3, VideoMinMaxOps> op; + BitField<54, 1, u64> dest_sign; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> mx; + } const vmnmx{insn}; + + if (vmnmx.cc != 0) { + throw NotImplementedException("VMNMX CC"); + } + if (vmnmx.sat != 0) { + throw NotImplementedException("VMNMX SAT"); + } + // Selectors were shown to default to 2 in unit tests + if (vmnmx.src_a_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); + } + if (vmnmx.src_b_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); + } + if (vmnmx.src_a_width != VideoWidth::Word) { + throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); + } + + const bool is_b_imm{vmnmx.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vmnmx.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const VideoWidth a_width{vmnmx.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmnmx.src_a_sign != 0}; + const bool src_b_signed{vmnmx.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; + + // First operation's sign is only dependent on operand b's sign + const bool op_1_signed{src_b_signed}; + + const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) + : ir.IMin(op_a, op_b, op_1_signed)}; + X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp @@ -0,0 +1,64 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::VMAD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 2, u64> scale; + BitField<53, 1, u64> src_c_neg; + BitField<54, 1, u64> src_a_neg; + BitField<55, 1, u64> sat; + } const vmad{insn}; + + if (vmad.cc != 0) { + throw NotImplementedException("VMAD CC"); + } + if (vmad.sat != 0) { + throw NotImplementedException("VMAD SAT"); + } + if (vmad.scale != 0) { + throw NotImplementedException("VMAD SCALE"); + } + if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD PO"); + } + if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD NEG"); + } + const bool is_b_imm{vmad.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vmad.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const u32 a_selector{static_cast(vmad.src_a_selector)}; + // Immediate values can't have a selector + const u32 b_selector{is_b_imm ? 0U : static_cast(vmad.src_b_selector)}; + const VideoWidth a_width{vmad.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmad.src_a_sign != 0}; + const bool src_b_signed{vmad.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; + + X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..af13b3fcc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VsetpCompareOp : u64 { + False = 0, + LessThan, + Equal, + LessThanEqual, + GreaterThan = 16, + NotEqual, + GreaterThanEqual, + True, +}; + +CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { + switch (op) { + case VsetpCompareOp::False: + return CompareOp::False; + case VsetpCompareOp::LessThan: + return CompareOp::LessThan; + case VsetpCompareOp::Equal: + return CompareOp::Equal; + case VsetpCompareOp::LessThanEqual: + return CompareOp::LessThanEqual; + case VsetpCompareOp::GreaterThan: + return CompareOp::GreaterThan; + case VsetpCompareOp::NotEqual: + return CompareOp::NotEqual; + case VsetpCompareOp::GreaterThanEqual: + return CompareOp::GreaterThanEqual; + case VsetpCompareOp::True: + return CompareOp::True; + default: + throw NotImplementedException("Invalid compare op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 5, VsetpCompareOp> compare_op; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + } const vsetp{insn}; + + const bool is_b_imm{vsetp.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; + + const u32 a_selector{static_cast(vsetp.src_a_selector)}; + const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; + const VideoWidth a_width{vsetp.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; + + const bool src_a_signed{vsetp.src_a_sign != 0}; + const bool src_b_signed{vsetp.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, a_selector, src_b_signed)}; + + // Compare operation's sign is only dependent on operand b's sign + const bool compare_signed{src_b_signed}; + const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; + const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; + const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; + ir.SetPred(vsetp.dest_pred_a, result_a); + ir.SetPred(vsetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 84298ce1917da637e7f60ee6c95602a8e7512c8a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 14:08:17 -0400 Subject: shader: Implement ISCADD CC --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 42fd42bb1..7aef37f54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -43,7 +43,10 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { v.X(iscadd.dest_reg, result); if (iscadd.cc != 0) { - throw NotImplementedException("ISCADD CC"); + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); } } -- cgit v1.2.3 From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../translate/impl/load_store_local_shared.cpp | 197 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 -- 3 files changed, 199 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset)); + } else { + const s32 relative{static_cast(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + const Size nnn = encoding.size; + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, ir.LoadLocal(word_offset)); + for (int i = 1; i < bit_size / 32; ++i) { + X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, bit_size / 32)) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDL(u64) { - ThrowNotImplemented(Opcode::LDL); -} - -void TranslatorVisitor::LDS(u64) { - ThrowNotImplemented(Opcode::LDS); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { ThrowNotImplemented(Opcode::ST); } -void TranslatorVisitor::STL(u64) { - ThrowNotImplemented(Opcode::STL); -} - void TranslatorVisitor::STP(u64) { ThrowNotImplemented(Opcode::STP); } -void TranslatorVisitor::STS(u64) { - ThrowNotImplemented(Opcode::STS); -} - void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -- cgit v1.2.3 From cd9f75e2239666a932861f6d54138febf8736a8c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 20:16:26 -0400 Subject: shader: Fix ISCADD logic for PO/CC --- .../maxwell/translate/impl/integer_scaled_add.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 7aef37f54..93cc2c0b1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -30,23 +30,24 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { if (iscadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } + } else { + // When PO is present, add one + op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); } // With the operands already processed, scale A const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; - IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; - if (po) { - // .PO adds one to the final result - result = v.ir.IAdd(result, v.ir.Imm32(1)); - } + const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; v.X(iscadd.dest_reg, result); if (iscadd.cc != 0) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); - v.SetCFlag(v.ir.GetCarryFromOp(result)); - v.SetOFlag(v.ir.GetOverflowFromOp(result)); + const IR::U1 carry{v.ir.GetCarryFromOp(result)}; + const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; + v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); + v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); } } -- cgit v1.2.3 From dbc1e5cde79b9165605741e1ea7158513ef6499f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Mar 2021 23:01:28 -0400 Subject: shader: Implement I2I SAT --- .../impl/integer_to_integer_conversion.cpp | 40 ++++++++++++++++------ 1 file changed, 30 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index e8f35552c..98b7f59f7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -30,16 +30,33 @@ enum class IntegerWidth : u64 { [[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width) { const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 count{WidthSize(ir, dst_width)}; + return ir.BitFieldExtract(src, zero, count, false); +} + +[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, + bool dst_signed, bool src_signed) { + IR::U32 min{}; + IR::U32 max{}; + const IR::U32 zero{ir.Imm32(0)}; switch (dst_width) { case IntegerWidth::Byte: - return ir.BitFieldExtract(src, zero, ir.Imm32(8), false); + min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; + max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); + break; case IntegerWidth::Short: - return ir.BitFieldExtract(src, zero, ir.Imm32(16), false); + min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; + max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); + break; case IntegerWidth::Word: - return ir.BitFieldExtract(src, zero, ir.Imm32(32), false); + min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; + max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); + break; default: throw NotImplementedException("Invalid width {}", dst_width); } + const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; + return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); } void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { @@ -60,9 +77,6 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (i2i.cc != 0) { throw NotImplementedException("I2I CC"); } - if (i2i.sat != 0) { - throw NotImplementedException("I2I SAT"); - } if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { throw NotImplementedException("16-bit source format incompatible with selector {}", i2i.selector); @@ -75,15 +89,21 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { const s32 selector{static_cast(i2i.selector)}; const IR::U32 offset{v.ir.Imm32(selector * 8)}; const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; - IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, i2i.src_fmt_sign != 0)}; - if (i2i.abs) { + const bool src_signed{i2i.src_fmt_sign != 0}; + const bool dst_signed{i2i.dst_fmt_sign != 0}; + const bool sat{i2i.sat != 0}; + + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; + if (i2i.abs != 0) { src_values = v.ir.IAbs(src_values); } - if (i2i.neg) { + if (i2i.neg != 0) { src_values = v.ir.INeg(src_values); } + const IR::U32 result{ + sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) + : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; - const IR::U32 result{ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; v.X(i2i.dest_reg, result); } } // Anonymous namespace -- cgit v1.2.3 From 73af0d2e0d12d94b1d2dc8c0b448d0769cf111f4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 21:33:52 -0400 Subject: shader: Implement I2I CC --- .../maxwell/translate/impl/integer_to_integer_conversion.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index 98b7f59f7..2f1a58805 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -74,9 +74,6 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<50, 1, u64> sat; } const i2i{insn}; - if (i2i.cc != 0) { - throw NotImplementedException("I2I CC"); - } if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { throw NotImplementedException("16-bit source format incompatible with selector {}", i2i.selector); @@ -105,6 +102,10 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; v.X(i2i.dest_reg, result); + if (i2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } } } // Anonymous namespace -- cgit v1.2.3 From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: shader: Implement BRX --- .../frontend/maxwell/control_flow.cpp | 58 +++++++++-- .../frontend/maxwell/control_flow.h | 7 +- .../maxwell/indirect_branch_table_track.cpp | 108 +++++++++++++++++++++ .../frontend/maxwell/indirect_branch_table_track.h | 28 ++++++ .../frontend/maxwell/instruction.h | 1 + .../frontend/maxwell/structured_control_flow.cpp | 57 +++++++++++ .../maxwell/translate/impl/branch_indirect.cpp | 36 +++++++ .../maxwell/translate/impl/load_constant.cpp | 29 +----- .../maxwell/translate/impl/load_constant.h | 39 ++++++++ .../maxwell/translate/impl/not_implemented.cpp | 8 -- 10 files changed, 327 insertions(+), 44 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 4f6707fae..1e9b8e426 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -14,6 +14,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { @@ -252,9 +253,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Opcode opcode{Decode(inst.raw)}; switch (opcode) { case Opcode::BRA: - case Opcode::BRX: case Opcode::JMP: - case Opcode::JMX: case Opcode::RET: if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; @@ -264,10 +263,6 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::JMP: AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); break; - case Opcode::BRX: - case Opcode::JMX: - AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); - break; case Opcode::RET: block->end_class = EndClass::Return; break; @@ -302,6 +297,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::SSY: block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; + case Opcode::BRX: + case Opcode::JMX: + return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); case Opcode::PRET: @@ -407,8 +405,46 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { - throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id) { + const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + if (!brx_table) { + TrackIndirectBranchTable(env, pc, block->begin); + throw NotImplementedException("Failed to track indirect branch"); + } + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { + throw NotImplementedException("Conditional indirect branch"); + } + std::vector targets; + targets.reserve(brx_table->num_entries); + for (u32 i = 0; i < brx_table->num_entries; ++i) { + u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; + if (!is_absolute) { + target += pc.Offset(); + } + target += brx_table->branch_offset; + target += 8; + targets.push_back(target); + } + std::ranges::sort(targets); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + + block->indirect_branches.reserve(targets.size()); + for (const u32 target : targets) { + Block* const branch{AddLabel(block, block->stack, target, function_id)}; + block->indirect_branches.push_back(branch); + } + block->cond = IR::Condition{true}; + block->end = pc + 1; + block->end_class = EndClass::IndirectBranch; + block->branch_reg = brx_table->branch_reg; + block->branch_offset = brx_table->branch_offset + 8; + if (!is_absolute) { + block->branch_offset += pc.Offset(); + } + return AnalysisState::Branch; } CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, @@ -449,7 +485,6 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function // Block already exists and it has been visited return &*it; } - // TODO: FIX DANGLING BLOCKS Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, @@ -494,6 +529,11 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::IndirectBranch: + for (Block* const branch : block.indirect_branches) { + add_branch(branch, false); + } + break; case EndClass::Call: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 22f134194..1e05fcb97 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -26,6 +26,7 @@ using FunctionId = size_t; enum class EndClass { Branch, + IndirectBranch, Call, Exit, Return, @@ -76,11 +77,14 @@ struct Block : boost::intrusive::set_base_hook< union { Block* branch_true; FunctionId function_call; + IR::Reg branch_reg; }; union { Block* branch_false; Block* return_block; + s32 branch_offset; }; + std::vector indirect_branches; }; struct Label { @@ -139,7 +143,8 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..96453509d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +namespace { +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 19, u64> immediate; + BitField<56, 1, u64> is_negative; + BitField<20, 24, s64> brx_offset; +}; + +template +std::optional Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { + while (pos >= block_begin) { + const u64 insn{env.ReadInstruction(pos.Offset())}; + --pos; + if (func(insn, Decode(insn))) { + return insn; + } + } + return std::nullopt; +} + +std::optional TrackLDC(Environment& env, Location block_begin, Location& pos, + IR::Reg brx_reg) { + return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { + const LDC::Encoding ldc{insn}; + return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && + ldc.mode == LDC::Mode::Default; + }); +} + +std::optional TrackSHL(Environment& env, Location block_begin, Location& pos, + IR::Reg ldc_reg) { + return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { + const Encoding shl{insn}; + return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; + }); +} + +std::optional TrackIMNMX(Environment& env, Location block_begin, Location& pos, + IR::Reg shl_reg) { + return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { + const Encoding imnmx{insn}; + return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; + }); +} +} // Anonymous namespace + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin) { + const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; + const Opcode brx_opcode{Decode(brx_insn)}; + if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { + throw LogicError("Tracked instruction is not BRX or JMX"); + } + const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; + const s32 brx_offset{static_cast(Encoding{brx_insn}.brx_offset)}; + + Location pos{brx_pos}; + const std::optional ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; + if (!ldc_insn) { + return std::nullopt; + } + const LDC::Encoding ldc{*ldc_insn}; + const u32 cbuf_index{static_cast(ldc.index)}; + const u32 cbuf_offset{static_cast(static_cast(ldc.offset.Value()))}; + const IR::Reg ldc_reg{ldc.src_reg}; + + const std::optional shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; + if (!shl_insn) { + return std::nullopt; + } + const Encoding shl{*shl_insn}; + const IR::Reg shl_reg{shl.src_reg}; + + const std::optional imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; + if (!imnmx_insn) { + return std::nullopt; + } + const Encoding imnmx{*imnmx_insn}; + if (imnmx.is_negative != 0) { + return std::nullopt; + } + const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; + return IndirectBranchTableInfo{ + .cbuf_index{cbuf_index}, + .cbuf_offset{cbuf_offset}, + .num_entries{imnmx_immediate + 1}, + .branch_offset{brx_offset}, + .branch_reg{brx_reg}, + }; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell { + +struct IndirectBranchTableInfo { + u32 cbuf_index{}; + u32 cbuf_offset{}; + u32 num_entries{}; + s32 branch_offset{}; + IR::Reg branch_reg{}; +}; + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h index 57fd531f2..743d68d61 100644 --- a/src/shader_recompiler/frontend/maxwell/instruction.h +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/reg.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 9d4688390..a6e55f61e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -17,6 +17,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/object_pool.h" @@ -46,12 +47,15 @@ enum class StatementType { Break, Return, Kill, + Unreachable, Function, Identity, Not, Or, SetVariable, + SetIndirectBranchVariable, Variable, + IndirectBranchCond, }; bool HasChildren(StatementType type) { @@ -72,12 +76,15 @@ struct Loop {}; struct Break {}; struct Return {}; struct Kill {}; +struct Unreachable {}; struct FunctionTag {}; struct Identity {}; struct Not {}; struct Or {}; struct SetVariable {}; +struct SetIndirectBranchVariable {}; struct Variable {}; +struct IndirectBranchCond {}; #ifdef _MSC_VER #pragma warning(push) @@ -96,6 +103,7 @@ struct Statement : ListBaseHook { : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return) : type{StatementType::Return} {} Statement(Kill) : type{StatementType::Kill} {} + Statement(Unreachable) : type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} @@ -103,7 +111,12 @@ struct Statement : ListBaseHook { : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + : branch_offset{branch_offset_}, + branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_) + : location{location_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -118,11 +131,14 @@ struct Statement : ListBaseHook { IR::Condition guest_cond; Statement* op; Statement* op_a; + u32 location; + s32 branch_offset; }; union { Statement* cond; Statement* op_b; u32 id; + IR::Reg branch_reg; }; Statement* up{}; StatementType type; @@ -141,6 +157,8 @@ std::string DumpExpr(const Statement* stmt) { return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); case StatementType::Variable: return fmt::format("goto_L{}", stmt->id); + case StatementType::IndirectBranchCond: + return fmt::format("(indirect_branch == {:x})", stmt->location); default: return ""; } @@ -182,14 +200,22 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { case StatementType::Kill: ret += fmt::format("{} kill;\n", indent); break; + case StatementType::Unreachable: + ret += fmt::format("{} unreachable;\n", indent); + break; case StatementType::SetVariable: ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); break; + case StatementType::SetIndirectBranchVariable: + ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, + stmt->branch_offset); + break; case StatementType::Function: case StatementType::Identity: case StatementType::Not: case StatementType::Or: case StatementType::Variable: + case StatementType::IndirectBranchCond: throw LogicError("Statement can't be printed"); } } @@ -417,6 +443,17 @@ private: } break; } + case Flow::EndClass::IndirectBranch: + root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, + block.branch_offset)); + for (Flow::Block* const branch : block.indirect_branches) { + const Node indirect_label{local_labels.at(branch)}; + Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + } + root.insert(ip, *pool.Create(Unreachable{})); + break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; const Node call_return_label{local_labels.at(block.return_block)}; @@ -623,6 +660,8 @@ IR::Block* TryFindForwardBlock(const Statement& stmt) { return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); case StatementType::Variable: return ir.GetGotoVariable(stmt.id); + case StatementType::IndirectBranchCond: + return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); default: throw NotImplementedException("Statement type {}", stmt.type); } @@ -670,6 +709,15 @@ private: ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } + case StatementType::SetIndirectBranchVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; + ir.SetIndirectBranchVariable(address); + break; + } case StatementType::If: { if (!current_block) { current_block = block_pool.Create(inst_pool); @@ -756,6 +804,15 @@ private: current_block = demote_block; break; } + case StatementType::Unreachable: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Unreachable(); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void Check(u64 insn) { + union { + u64 raw; + BitField<5, 1, u64> cbuf_mode; + BitField<6, 1, u64> lmt; + } const encoding{insn}; + + if (encoding.cbuf_mode != 0) { + throw NotImplementedException("Constant buffer mode"); + } + if (encoding.lmt != 0) { + throw NotImplementedException("LMT"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BRX(u64 insn) { + Check(insn); +} + +void TranslatorVisitor::JMX(u64 insn) { + Check(insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 39becf93c..49ccb7d62 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -5,25 +5,11 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" namespace Shader::Maxwell { +using namespace LDC; namespace { -enum class Mode : u64 { - Default, - IL, - IS, - ISL, -}; - -enum class Size : u64 { - U8, - S8, - U16, - S16, - B32, - B64, -}; - std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, const IR::U32& reg, const IR::U32& imm) { switch (mode) { @@ -37,16 +23,7 @@ std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& im } // Anonymous namespace void TranslatorVisitor::LDC(u64 insn) { - union { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_reg; - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - BitField<44, 2, Mode> mode; - BitField<48, 3, Size> size; - } const ldc{insn}; - + const Encoding ldc{insn}; const IR::U32 imm_index{ir.Imm32(static_cast(ldc.index))}; const IR::U32 reg{X(ldc.src_reg)}; const IR::U32 imm{ir.Imm32(static_cast(ldc.offset))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell::LDC { + +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; +}; + +} // namespace Shader::Maxwell::LDC diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b62d8ee2a..a0057a473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -53,10 +53,6 @@ void TranslatorVisitor::BRK(u64) { ThrowNotImplemented(Opcode::BRK); } -void TranslatorVisitor::BRX(u64) { - ThrowNotImplemented(Opcode::BRX); -} - void TranslatorVisitor::CAL() { // CAL is a no-op } @@ -181,10 +177,6 @@ void TranslatorVisitor::JMP(u64) { ThrowNotImplemented(Opcode::JMP); } -void TranslatorVisitor::JMX(u64) { - ThrowNotImplemented(Opcode::JMX); -} - void TranslatorVisitor::KIL() { // KIL is a no-op } -- cgit v1.2.3 From 6c51f496320f698e123207c09ca61e55180a31b5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 22:23:45 -0400 Subject: shader: Implement FSWZADD --- .../translate/impl/floating_point_swizzled_add.cpp | 44 ++++++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 4 ++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/not_implemented.cpp | 4 -- 4 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..e42921a21 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::FSWZADD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<28, 8, u64> swizzle; + BitField<38, 1, u64> ndv; + BitField<39, 2, FpRounding> round; + BitField<44, 1, u64> ftz; + BitField<47, 1, u64> cc; + } const fswzadd{insn}; + + if (fswzadd.ndv != 0) { + throw NotImplementedException("FSWZADD NDV"); + } + + const IR::F32 src_a{GetFloatReg8(insn)}; + const IR::F32 src_b{GetFloatReg20(insn)}; + const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; + + const IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{CastFpRounding(fswzadd.round)}, + .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; + F(fswzadd.dest_reg, result); + + if (fswzadd.cc != 0) { + throw NotImplementedException("FSWZADD CC"); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 9bae89c10..30b570ce4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } +IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { + return ir.BitCast(GetReg8(insn)); +} + IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 54c31deb4..bf7d1bae8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -353,6 +353,7 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0057a473..6a580f831 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSWZADD(u64) { - ThrowNotImplemented(Opcode::FSWZADD); -} - void TranslatorVisitor::GETCRSPTR(u64) { ThrowNotImplemented(Opcode::GETCRSPTR); } -- cgit v1.2.3 From b0d5572abfe1f14e02d8219f0a4d7dd09ff36fd1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 22:13:37 -0300 Subject: shader: Fix indirect branches to scheduler instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 ++++++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 9 ++++++++- .../frontend/maxwell/structured_control_flow.cpp | 6 +++--- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1e9b8e426..784f9df8a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -434,7 +434,10 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, block->indirect_branches.reserve(targets.size()); for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; - block->indirect_branches.push_back(branch); + block->indirect_branches.push_back({ + .block{branch}, + .address{target}, + }); } block->cond = IR::Condition{true}; block->end = pc + 1; @@ -530,8 +533,8 @@ std::string CFG::Dot() const { } break; case EndClass::IndirectBranch: - for (Block* const branch : block.indirect_branches) { - add_branch(branch, false); + for (const IndirectBranch& branch : block.indirect_branches) { + add_branch(branch.block, false); } break; case EndClass::Call: diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 1e05fcb97..a8c90d27a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -22,6 +22,8 @@ namespace Shader::Maxwell::Flow { +struct Block; + using FunctionId = size_t; enum class EndClass { @@ -60,6 +62,11 @@ private: boost::container::small_vector entries; }; +struct IndirectBranch { + Block* block; + u32 address; +}; + struct Block : boost::intrusive::set_base_hook< // Normal link is ~2.5% faster compared to safe link boost::intrusive::link_mode> { @@ -84,7 +91,7 @@ struct Block : boost::intrusive::set_base_hook< Block* return_block; s32 branch_offset; }; - std::vector indirect_branches; + std::vector indirect_branches; }; struct Label { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index a6e55f61e..c804c2a8e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -446,9 +446,9 @@ private: case Flow::EndClass::IndirectBranch: root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, block.branch_offset)); - for (Flow::Block* const branch : block.indirect_branches) { - const Node indirect_label{local_labels.at(branch)}; - Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + for (const Flow::IndirectBranch& indirect : block.indirect_branches) { + const Node indirect_label{local_labels.at(indirect.block)}; + Statement* cond{pool.Create(IndirectBranchCond{}, indirect.address)}; Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); } -- cgit v1.2.3 From dc1a9a3bed2aa9b0851f07976b0c687172aa3edc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 20:51:05 +0100 Subject: shader: Implement TLD --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_load.cpp | 165 +++++++++++++++++++++ 3 files changed, 167 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index d668dc1aa..b47fb9c2e 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -252,8 +252,8 @@ INST(SYNC, "SYNC", "1111 0000 1111 1---") INST(TEX, "TEX", "1100 0--- ---- ----") INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") -INST(TLD, "TLD", "1101 1100 --11 1---") -INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") +INST(TLD, "TLD", "1101 1100 ---- ----") +INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") INST(TLD4, "TLD4", "1100 10-- ---- ----") INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6a580f831..60d61ec6e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TLD(u64) { - ThrowNotImplemented(Opcode::TLD); -} - -void TranslatorVisitor::TLD_b(u64) { - ThrowNotImplemented(Opcode::TLD_b); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..b4063fa6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -0,0 +1,165 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{ + [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; + switch (type) { + case TextureType::_1D: + return v.X(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<55, 1, u64> lod; + BitField<50, 1, u64> multisample; + BitField<35, 1, u64> aoffi; + BitField<54, 1, u64> clamp; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld{insn}; + + const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; + + IR::Reg meta_reg{tld.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::U32 lod; + IR::U32 multisample; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + if (tld.lod != 0) { + lod = v.X(meta_reg++); + } + if (tld.aoffi != 0) { + offset = MakeOffset(v, meta_reg, tld.type); + } + if (tld.multisample != 0) { + multisample = v.X(meta_reg++); + } + if (tld.clamp != 0) { + throw NotImplementedException("TLD.CL - CLAMP is not implmented"); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld.type, false)); + const IR::Value sample{[&]() -> IR::Value { + return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); + }()}; + + IR::Reg dest_reg{tld.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TLD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 2c276ec6ebff55fb97262ccb50d1ab6a04b3c06a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 01:45:20 +0100 Subject: shader: Implement TLDS --- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../translate/impl/texture_load_swizzled.cpp | 252 +++++++++++++++++++++ 2 files changed, 252 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 60d61ec6e..7e1ad63e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,10 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TLDS(u64) { - ThrowNotImplemented(Opcode::TLDS); -} - void TranslatorVisitor::TMML(u64) { ThrowNotImplemented(Opcode::TMML); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..3e6ebd911 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -0,0 +1,252 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<54, 1, u64> aoffi; + BitField<53, 1, u64> lod; + BitField<55, 1, u64> ms; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; + BitField<53, 4, u64> encoding; +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tlds{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(tlds.cbuf_offset * 4))}; + const IR::Reg reg_a{tlds.src_reg_a}; + const IR::Reg reg_b{tlds.src_reg_b}; + IR::Value coords; + IR::U32 lod; + IR::Value offsets; + IR::U32 multisample; + Shader::TextureType texture_type; + switch (tlds.encoding) { + case 0: { + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + break; + } + case 1: { + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + lod = v.X(reg_b); + break; + } + case 2: { + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); + break; + } + case 4: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + offsets = MakeOffset(v, reg_b); + break; + } + case 5: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + break; + } + case 6: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + multisample = v.X(reg_b); + break; + } + case 7: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color3D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); + break; + } + case 8: { + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::ColorArray2D; + IR::U32 array = v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16)); + coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); + break; + } + case 12: { + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + offsets = MakeOffset(v, reg_b + 1); + break; + } + default: { + throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); + break; + } + } + IR::TextureInstInfo info{}; + if (tlds.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.type.Assign(texture_type); + return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); +} + +unsigned Swizzle(u64 insn) { + const Encoding tlds{insn}; + const size_t encoding{tlds.swizzle}; + if (tlds.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + return IR::F32{v.ir.CompositeExtract(sample, component)}; +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding tlds{insn}; + switch (index) { + case 0: + return tlds.dest_reg_a; + case 1: + CheckAlignment(tlds.dest_reg_a, 2); + return tlds.dest_reg_a + 1; + case 2: + return tlds.dest_reg_b; + case 3: + CheckAlignment(tlds.dest_reg_b, 2); + return tlds.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding tlds{insn}; + switch (store_index) { + case 1: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLDS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} +} // namespace Shader::Maxwell -- cgit v1.2.3 From be3e94ae55184933e0f1f5fb55698513f7936382 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 28 Mar 2021 21:25:08 +0200 Subject: shader: Implement TMML partially --- .../maxwell/translate/impl/not_implemented.cpp | 8 -- .../translate/impl/texture_mipmap_level.cpp | 130 +++++++++++++++++++++ 2 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 7e1ad63e1..9f5ea7775 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TMML(u64) { - ThrowNotImplemented(Opcode::TMML); -} - -void TranslatorVisitor::TMML_b(u64) { - ThrowNotImplemented(Opcode::TMML_b); -} - void TranslatorVisitor::TXA(u64) { ThrowNotImplemented(Opcode::TXA); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..ee13ede30 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -0,0 +1,130 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> ndv; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tmml{insn}; + + if ((tmml.mask & 0xC) != 0) { + throw NotImplementedException("TMML BA results are not implmented"); + } + + IR::F32 transform_constant = v.ir.Imm32(256.0f); + + const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; + + IR::U32 handle; + IR::Reg meta_reg{tmml.meta_reg}; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tmml.type, false)); + const IR::Value sample{ + [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; + + const IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RP}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + IR::Reg dest_reg{tmml.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tmml.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; + v.F(dest_reg, + element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TMML(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TMML_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 630273b6295f524401abf1c131dba09fdd055911 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 02:52:52 +0200 Subject: shader: Implement TXD --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_gradient.cpp | 180 +++++++++++++++++++++ 3 files changed, 182 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index b47fb9c2e..c759bd4d4 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -261,8 +261,8 @@ INST(TLDS, "TLDS", "1101 -01- ---- ----") INST(TMML, "TMML", "1101 1111 0101 1---") INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") INST(TXA, "TXA", "1101 1111 0100 0---") -INST(TXD, "TXD", "1101 1110 0011 10--") -INST(TXD_b, "TXD (b)", "1101 1110 0111 10--") +INST(TXD, "TXD", "1101 1110 00-- ----") +INST(TXD_b, "TXD (b)", "1101 1110 01-- ----") INST(TXQ, "TXQ", "1101 1111 0100 1---") INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9f5ea7775..ba526817a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -317,14 +317,6 @@ void TranslatorVisitor::TXA(u64) { ThrowNotImplemented(Opcode::TXA); } -void TranslatorVisitor::TXD(u64) { - ThrowNotImplemented(Opcode::TXD); -} - -void TranslatorVisitor::TXD_b(u64) { - ThrowNotImplemented(Opcode::TXD_b); -} - void TranslatorVisitor::VABSDIFF(u64) { ThrowNotImplemented(Opcode::VABSDIFF); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..00768e167 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -0,0 +1,180 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { + const IR::U32 value{v.X(reg)}; + const u32 base = has_lod_clamp ? 12 : 16; + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> aoffi; + BitField<50, 1, u64> lc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> derivate_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const txd{insn}; + + const bool has_lod_clamp = txd.lc != 0; + if (has_lod_clamp) { + throw NotImplementedException("TXD.LC - CLAMP is not implemented"); + } + + IR::Value coords; + u32 num_derivates; + IR::Reg base_reg = txd.coord_reg; + IR::Reg last_reg; + IR::Value handle; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); + } else { + handle = v.X(base_reg++); + } + + const auto read_array{[&]() -> IR::F32 { + return v.ir.ConvertUToF(32, 16, + v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), + v.ir.Imm32(has_lod_clamp ? 12 : 16))); + }}; + switch (txd.type) { + case TextureType::_1D: { + coords = v.F(base_reg); + num_derivates = 1; + last_reg = base_reg + 1; + break; + } + case TextureType::ARRAY_1D: { + last_reg = base_reg + 1; + coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); + num_derivates = 1; + break; + } + case TextureType::_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); + num_derivates = 2; + break; + } + case TextureType::ARRAY_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); + num_derivates = 2; + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + const IR::Reg derivate_reg{txd.derivate_reg}; + IR::Value derivates; + switch (num_derivates) { + case 1: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); + break; + } + case 2: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), + v.F(derivate_reg + 2), v.F(derivate_reg + 3)); + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + IR::Value offset; + if (txd.aoffi != 0) { + offset = MakeOffset(v, last_reg, has_lod_clamp); + } + + IR::F32 lod_clamp; + if (has_lod_clamp) { + const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast(0x3b800000U)); + const IR::F32 tmp = v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); + lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); + } + + IR::TextureInstInfo info{}; + info.type.Assign(GetType(txd.type, false)); + info.num_derivates.Assign(num_derivates); + info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); + const IR::Value sample{[&]() -> IR::Value { + return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); + }()}; + + IR::Reg dest_reg{txd.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((txd.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (txd.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TXD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From cb6fc03e55e9eff0826173a6bcacef3034322f7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 01:08:25 -0300 Subject: shader: Always pass a lod for TexelFetch --- .../maxwell/translate/impl/texture_load.cpp | 2 ++ .../translate/impl/texture_load_swizzled.cpp | 34 ++++++++-------------- 2 files changed, 14 insertions(+), 22 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index b4063fa6e..df38f87a3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -124,6 +124,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } if (tld.lod != 0) { lod = v.X(meta_reg++); + } else { + lod = v.ir.Imm32(0U); } if (tld.aoffi != 0) { offset = MakeOffset(v, meta_reg, tld.type); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 3e6ebd911..623b8fc23 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -74,62 +74,55 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { const IR::Reg reg_a{tlds.src_reg_a}; const IR::Reg reg_b{tlds.src_reg_b}; IR::Value coords; - IR::U32 lod; + IR::U32 lod{v.ir.Imm32(0U)}; IR::Value offsets; IR::U32 multisample; - Shader::TextureType texture_type; + Shader::TextureType texture_type{}; switch (tlds.encoding) { - case 0: { + case 0: texture_type = Shader::TextureType::Color1D; coords = v.X(reg_a); break; - } - case 1: { + case 1: texture_type = Shader::TextureType::Color1D; coords = v.X(reg_a); lod = v.X(reg_b); break; - } - case 2: { + case 2: texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); break; - } - case 4: { + case 4: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); offsets = MakeOffset(v, reg_b); break; - } - case 5: { + case 5: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); lod = v.X(reg_b); break; - } - case 6: { + case 6: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); multisample = v.X(reg_b); break; - } - case 7: { + case 7: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color3D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); break; - } case 8: { CheckAlignment(reg_b, 2); + const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; texture_type = Shader::TextureType::ColorArray2D; - IR::U32 array = v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16)); coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); break; } - case 12: { + case 12: CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); texture_type = Shader::TextureType::Color2D; @@ -137,11 +130,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { lod = v.X(reg_b); offsets = MakeOffset(v, reg_b + 1); break; - } - default: { + default: throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); - break; - } } IR::TextureInstInfo info{}; if (tlds.precision == Precision::F16) { -- cgit v1.2.3 From 4d0d29fc2092bf02e102b8bac9cfa1b509274901 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 08:41:21 +0200 Subject: shader: Address feedback --- .../maxwell/translate/impl/texture_gradient.cpp | 34 ++++++++++++---------- .../maxwell/translate/impl/texture_load.cpp | 10 +++---- .../translate/impl/texture_mipmap_level.cpp | 26 +++++++---------- 3 files changed, 33 insertions(+), 37 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index 00768e167..c66468a48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -47,7 +47,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { const IR::U32 value{v.X(reg)}; - const u32 base = has_lod_clamp ? 12 : 16; + const u32 base{has_lod_clamp ? 12U : 16U}; return v.ir.CompositeConstruct( v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); @@ -74,20 +74,21 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::Value coords; - u32 num_derivates; - IR::Reg base_reg = txd.coord_reg; + u32 num_derivates{}; + IR::Reg base_reg{txd.coord_reg}; IR::Reg last_reg; IR::Value handle; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(base_reg++); + } else { + handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); } const auto read_array{[&]() -> IR::F32 { - return v.ir.ConvertUToF(32, 16, - v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), - v.ir.Imm32(has_lod_clamp ? 12 : 16))); + const IR::U32 base{v.ir.Imm32(0)}; + const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; + const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; + return v.ir.ConvertUToF(32, 16, array_index); }}; switch (txd.type) { case TextureType::_1D: { @@ -141,19 +142,20 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::F32 lod_clamp; if (has_lod_clamp) { - const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast(0x3b800000U)); - const IR::F32 tmp = v.ir.ConvertUToF( - 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); - lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); + // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. + // to convert a fixed point, float(value) / float(1 << fixed_point) + // in this case the fixed_point is 8. + const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast(1U << 8))}; + const IR::F32 fixp_lc{v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; + lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); } IR::TextureInstInfo info{}; info.type.Assign(GetType(txd.type, false)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); - }()}; + const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; IR::Reg dest_reg{txd.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index df38f87a3..987b7ec34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -117,10 +117,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::Value offset; IR::U32 lod; IR::U32 multisample; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); } if (tld.lod != 0) { lod = v.X(meta_reg++); @@ -138,9 +138,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; info.type.Assign(GetType(tld.type, false)); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); - }()}; + const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index ee13ede30..b6efc04f0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -81,39 +81,35 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { BitField<36, 13, u64> cbuf_offset; } const tmml{insn}; - if ((tmml.mask & 0xC) != 0) { + if ((tmml.mask & 0b1100) != 0) { throw NotImplementedException("TMML BA results are not implmented"); } - IR::F32 transform_constant = v.ir.Imm32(256.0f); + IR::F32 transform_constant{v.ir.Imm32(256.0f)}; const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; IR::U32 handle; IR::Reg meta_reg{tmml.meta_reg}; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; info.type.Assign(GetType(tmml.type, false)); - const IR::Value sample{ - [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; + const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; - const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RP}, - .fmz_mode{IR::FmzMode::FTZ}, - }; IR::Reg dest_reg{tmml.dest_reg}; for (size_t element = 0; element < 4; ++element) { if (((tmml.mask >> element) & 1) == 0) { continue; } - IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; - v.F(dest_reg, - element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); + IR::F32 value{v.ir.CompositeExtract(sample, element)}; + if (element < 2) { + value = v.ir.FPMul(value, transform_constant); + } + v.F(dest_reg, value); ++dest_reg; } } -- cgit v1.2.3 From eaafd53cfedf0c7ae40a3f790af5f0aec63ebd13 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 31 Mar 2021 19:46:10 -0300 Subject: shader: Implement LDG .U.128 as .128 --- .../frontend/maxwell/translate/impl/load_store_memory.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 748b856c9..71688b1d7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -118,7 +118,8 @@ void TranslatorVisitor::LDG(u64 insn) { } break; } - case LoadSize::B128: { + case LoadSize::B128: + case LoadSize::U128: { if (!IR::IsAligned(dest_reg, 4)) { throw NotImplementedException("Unaligned data registers"); } @@ -128,8 +129,6 @@ void TranslatorVisitor::LDG(u64 insn) { } break; } - case LoadSize::U128: - throw NotImplementedException("LDG U.128"); default: throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); } -- cgit v1.2.3 From b4a5e767d0a60d44c77460bd3a4062c5f69fb6c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 2 Apr 2021 01:17:47 -0300 Subject: shader: Fix branches to visited virtual blocks --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 784f9df8a..ac8707847 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -486,6 +486,16 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { // Block already exists and it has been visited + if (function.blocks.begin() != it) { + // Check if the previous node is the virtual variant of the label + // This won't exist if a virtual node is not needed or it hasn't been visited + // If it hasn't been visited and a virtual node is needed, this will still behave as + // expected because the node impersonated with its virtual node. + const auto prev{std::prev(it)}; + if (it->begin.Virtual() == prev->begin) { + return &*prev; + } + } return &*it; } Block* const new_block{block_pool.Create(Block{ -- cgit v1.2.3 From 5ed8f2438498d3281c2ce8621869995de3908413 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 07:52:36 +0200 Subject: shader: Stub VOTE.VTG --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 4 ++++ .../frontend/maxwell/translate/impl/not_implemented.cpp | 4 ---- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 93cea302a..a295f4c5e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -95,6 +95,10 @@ enum class SpecialRegister : u64 { return ir.WorkgroupIdY(); case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); + case SpecialRegister::SR_WSCALEFACTOR_XY: + return ir.Imm32(Common::BitCast(1.0f)); + case SpecialRegister::SR_WSCALEFACTOR_Z: + return ir.Imm32(Common::BitCast(1.0f)); default: throw NotImplementedException("S2R special register {}", special_register); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba526817a..83ed0c0fd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -329,10 +329,6 @@ void TranslatorVisitor::VADD(u64) { ThrowNotImplemented(Opcode::VADD); } -void TranslatorVisitor::VOTE_vtg(u64) { - ThrowNotImplemented(Opcode::VOTE_vtg); -} - void TranslatorVisitor::VSET(u64) { ThrowNotImplemented(Opcode::VSET); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index a88894a7e..391520a18 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -49,4 +49,8 @@ void TranslatorVisitor::VOTE(u64 insn) { Vote(*this, insn); } +void TranslatorVisitor::VOTE_vtg(u64) { + // Stub +} + } // namespace Shader::Maxwell -- cgit v1.2.3 From ecb30c907266921818d5b6b03e341028fa2ea082 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 22:20:57 +0200 Subject: shader: Improve VOTE.VTG stub --- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 391520a18..2acabb662 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -50,7 +50,10 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // Stub + // LOG_WARNING("VOTE.VTG: Stubbed!"); + auto imm = ir.Imm1(false); + ir.SetFCSMFlag(imm); + ir.SetTRFlag(imm); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 655f7a570a10218ffb2ed175bb7f0b84530ccae0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 19:27:30 +0200 Subject: shader: Implement MEMBAR --- .../maxwell/translate/impl/barrier_operations.cpp | 56 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 11 ----- 2 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..933af572c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +// Seems to be in CUDA terminology. +enum class LocalScope : u64 { + CTG = 0, + GL = 1, + SYS = 2, + VC = 3, +}; + +IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { + switch (scope) { + case LocalScope::CTG: + return IR::MemoryScope::Warp; + case LocalScope::GL: + return IR::MemoryScope::Device; + case LocalScope::SYS: + return IR::MemoryScope::System; + case LocalScope::VC: + return IR::MemoryScope::Workgroup; // or should be device? + default: + throw NotImplementedException("Unimplemented Local Scope {}", scope); + } +} + +} // namespace + +void TranslatorVisitor::MEMBAR(u64 inst) { + union { + u64 raw; + BitField<8, 2, LocalScope> scope; + } membar{inst}; + IR::BarrierInstInfo info{}; + info.scope.Assign(LocalScopeToMemoryScope(membar.scope)); + ir.MemoryBarrier(info); +} + +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op +} + +void TranslatorVisitor::BAR(u64) { + throw NotImplementedException("Instruction {} is not implemented", Opcode::BAR); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 83ed0c0fd..80a6ed578 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -37,10 +37,6 @@ void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } -void TranslatorVisitor::BAR(u64) { - ThrowNotImplemented(Opcode::BAR); -} - void TranslatorVisitor::BPT(u64) { ThrowNotImplemented(Opcode::BPT); } @@ -73,9 +69,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::DEPBAR() { - // DEPBAR is a no-op -} void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); @@ -189,10 +182,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::MEMBAR(u64) { - ThrowNotImplemented(Opcode::MEMBAR); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } -- cgit v1.2.3 From 45d547af11a18434ea17e4427db7286856a19537 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 23:05:47 +0200 Subject: shader: Implement SR_LaneId --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index a295f4c5e..731ac643f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -99,6 +99,8 @@ enum class SpecialRegister : u64 { return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: return ir.Imm32(Common::BitCast(1.0f)); + case SpecialRegister::SR_LANEID: + return ir.LaneId(); default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From baec84247fe815199595d9e8077b71f3b5c8317e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 01:48:39 +0200 Subject: shader: Address Feedback --- .../frontend/maxwell/translate/impl/barrier_operations.cpp | 12 ++++-------- .../maxwell/translate/impl/move_special_register.cpp | 2 ++ .../frontend/maxwell/translate/impl/vote.cpp | 5 +---- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 933af572c..26d5e276b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -5,8 +5,8 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/modifiers.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { @@ -21,28 +21,24 @@ enum class LocalScope : u64 { IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { switch (scope) { case LocalScope::CTG: - return IR::MemoryScope::Warp; + return IR::MemoryScope::Workgroup; case LocalScope::GL: return IR::MemoryScope::Device; case LocalScope::SYS: return IR::MemoryScope::System; - case LocalScope::VC: - return IR::MemoryScope::Workgroup; // or should be device? default: throw NotImplementedException("Unimplemented Local Scope {}", scope); } } -} // namespace +} // Anonymous namespace void TranslatorVisitor::MEMBAR(u64 inst) { union { u64 raw; BitField<8, 2, LocalScope> scope; } membar{inst}; - IR::BarrierInstInfo info{}; - info.scope.Assign(LocalScopeToMemoryScope(membar.scope)); - ir.MemoryBarrier(info); + ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); } void TranslatorVisitor::DEPBAR() { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 731ac643f..7d9c42a83 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -96,8 +96,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: + // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_XY (Stubbed)"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: + // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_Z (Stubbed)"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 2acabb662..d508e1e23 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -50,10 +50,7 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // LOG_WARNING("VOTE.VTG: Stubbed!"); - auto imm = ir.Imm1(false); - ir.SetFCSMFlag(imm); - ir.SetTRFlag(imm); + // LOG_WARNING(ShaderDecompiler, "VOTE.VTG: Stubbed!"); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 80df541a0860eecc599f60a7b2955e1e286bc48a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:28:44 +0200 Subject: shader: "Implement" NOP --- .../frontend/maxwell/translate/impl/not_implemented.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 80a6ed578..acabb0118 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -183,7 +183,7 @@ void TranslatorVisitor::LONGJMP(u64) { } void TranslatorVisitor::NOP(u64) { - ThrowNotImplemented(Opcode::NOP); + // NOP is No-Op. } void TranslatorVisitor::OUT_reg(u64) { -- cgit v1.2.3 From ca7ebdc471cfd9549b15f8ae5523c6fdddca57e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:16:09 -0300 Subject: shader: Fix FADD32I --- .../frontend/maxwell/translate/impl/floating_point_add.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 487198aa6..b39950c84 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -68,17 +68,15 @@ void TranslatorVisitor::FADD32I(u64 insn) { union { u64 raw; BitField<55, 1, u64> ftz; - BitField<53, 1, u64> neg_b; + BitField<56, 1, u64> neg_a; BitField<54, 1, u64> abs_a; BitField<52, 1, u64> cc; - BitField<56, 1, u64> neg_a; + BitField<53, 1, u64> neg_b; BitField<57, 1, u64> abs_b; - BitField<50, 1, u64> sat; } const fadd32i{insn}; - FADD(*this, insn, fadd32i.sat != 0, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, - GetFloatImm32(insn), fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, - fadd32i.neg_b != 0); + FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), + fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); } } // namespace Shader::Maxwell -- cgit v1.2.3 From c4aab5c40ec1347da9811169bbc3dfb23632ab98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:17:08 -0300 Subject: shader: Fix fp16 merge when using native fp16 --- .../frontend/maxwell/translate/impl/half_floating_point_helper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp index d0c6ba1aa..0dbeb7f56 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -51,9 +51,9 @@ IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const I case Merge::MRG_H0: case Merge::MRG_H1: { const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; - const bool h0{merge == Merge::MRG_H0}; - const IR::F16& insert{h0 ? lhs : rhs}; - return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + const bool is_h0{merge == Merge::MRG_H0}; + const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); } } throw InvalidArgument("Invalid merge {}", merge); -- cgit v1.2.3 From 0b26f2b90ea4fe6097d982b72dfe38c0a3658ad0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 06:40:16 -0300 Subject: shader: Remove unused header in VOTE --- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index d508e1e23..0793611ff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -- cgit v1.2.3 From 3f594dd86bd1ee1b178109132482c7d6b43e66dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 02:31:09 -0300 Subject: shader: Reimplement GetCbufU64 as GetCbufU32x2 It may generate better code on some compilers and it's easier to handle. --- .../frontend/maxwell/translate/impl/load_constant.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 49ccb7d62..ae3ecea32 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -30,25 +30,25 @@ void TranslatorVisitor::LDC(u64 insn) { const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; switch (ldc.size) { case Size::U8: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); break; case Size::S8: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); break; case Size::U16: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); break; case Size::S16: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); break; case Size::B32: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); break; case Size::B64: { if (!IR::IsAligned(ldc.dest_reg, 2)) { throw NotImplementedException("Unaligned destination register"); } - const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))}; + const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } -- cgit v1.2.3 From fc93bc2abde0b54a0a495f9b28a76fd34b47f320 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 03:04:48 -0300 Subject: shader: Implement BAR and fix memory barriers --- .../maxwell/translate/impl/barrier_operations.cpp | 58 +++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 26d5e276b..2a2a294df 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -38,6 +38,7 @@ void TranslatorVisitor::MEMBAR(u64 inst) { u64 raw; BitField<8, 2, LocalScope> scope; } membar{inst}; + ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); } @@ -45,8 +46,61 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::BAR(u64) { - throw NotImplementedException("Instruction {} is not implemented", Opcode::BAR); +void TranslatorVisitor::BAR(u64 insn) { + enum class Mode { + RedPopc, + Scan, + RedAnd, + RedOr, + Sync, + Arrive, + }; + union { + u64 raw; + BitField<43, 1, u64> is_a_imm; + BitField<44, 1, u64> is_b_imm; + BitField<8, 8, u64> imm_a; + BitField<20, 12, u64> imm_b; + BitField<42, 1, u64> neg_pred; + BitField<39, 3, IR::Pred> pred; + } const bar{insn}; + + const Mode mode{[insn] { + switch (insn & 0x0000009B00000000ULL) { + case 0x0000000200000000ULL: + return Mode::RedPopc; + case 0x0000000300000000ULL: + return Mode::Scan; + case 0x0000000A00000000ULL: + return Mode::RedAnd; + case 0x0000001200000000ULL: + return Mode::RedOr; + case 0x0000008000000000ULL: + return Mode::Sync; + case 0x0000008100000000ULL: + return Mode::Arrive; + } + throw NotImplementedException("Invalid encoding"); + }()}; + if (mode != Mode::Sync) { + throw NotImplementedException("BAR mode {}", mode); + } + if (bar.is_a_imm == 0) { + throw NotImplementedException("Non-immediate input A"); + } + if (bar.imm_a != 0) { + throw NotImplementedException("Non-zero input A"); + } + if (bar.is_b_imm == 0) { + throw NotImplementedException("Non-immediate input B"); + } + if (bar.imm_b != 0) { + throw NotImplementedException("Non-zero input B"); + } + if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { + throw NotImplementedException("Non-true input predicate"); + } + ir.Barrier(); } } // namespace Shader::Maxwell -- cgit v1.2.3 From da6cf2632cd4dc0d2b0278353fcaee0789b418c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 05:17:17 -0300 Subject: shader: Add subgroup masks --- .../translate/impl/move_special_register.cpp | 110 ++++++++++++++------- 1 file changed, 75 insertions(+), 35 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 7d9c42a83..be1f21e7b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -10,6 +10,7 @@ namespace Shader::Maxwell { namespace { enum class SpecialRegister : u64 { SR_LANEID = 0, + SR_CLOCK = 1, SR_VIRTCFG = 2, SR_VIRTID = 3, SR_PM0 = 4, @@ -20,6 +21,9 @@ enum class SpecialRegister : u64 { SR_PM5 = 9, SR_PM6 = 10, SR_PM7 = 11, + SR12 = 12, + SR13 = 13, + SR14 = 14, SR_ORDERING_TICKET = 15, SR_PRIM_TYPE = 16, SR_INVOCATION_ID = 17, @@ -41,44 +45,70 @@ enum class SpecialRegister : u64 { SR_TID_X = 33, SR_TID_Y = 34, SR_TID_Z = 35, + SR_CTA_PARAM = 36, SR_CTAID_X = 37, SR_CTAID_Y = 38, SR_CTAID_Z = 39, - SR_NTID = 49, - SR_CirQueueIncrMinusOne = 50, - SR_NLATC = 51, - SR_SWINLO = 57, - SR_SWINSZ = 58, - SR_SMEMSZ = 59, - SR_SMEMBANKS = 60, - SR_LWINLO = 61, - SR_LWINSZ = 62, - SR_LMEMLOSZ = 63, - SR_LMEMHIOFF = 64, - SR_EQMASK = 65, - SR_LTMASK = 66, - SR_LEMASK = 67, - SR_GTMASK = 68, - SR_GEMASK = 69, - SR_REGALLOC = 70, - SR_GLOBALERRORSTATUS = 73, - SR_WARPERRORSTATUS = 75, - SR_PM_HI0 = 81, - SR_PM_HI1 = 82, - SR_PM_HI2 = 83, - SR_PM_HI3 = 84, - SR_PM_HI4 = 85, - SR_PM_HI5 = 86, - SR_PM_HI6 = 87, - SR_PM_HI7 = 88, - SR_CLOCKLO = 89, - SR_CLOCKHI = 90, - SR_GLOBALTIMERLO = 91, - SR_GLOBALTIMERHI = 92, - SR_HWTASKID = 105, - SR_CIRCULARQUEUEENTRYINDEX = 106, - SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, - SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, + SR_NTID = 40, + SR_CirQueueIncrMinusOne = 41, + SR_NLATC = 42, + SR43 = 43, + SR_SM_SPA_VERSION = 44, + SR_MULTIPASSSHADERINFO = 45, + SR_LWINHI = 46, + SR_SWINHI = 47, + SR_SWINLO = 48, + SR_SWINSZ = 49, + SR_SMEMSZ = 50, + SR_SMEMBANKS = 51, + SR_LWINLO = 52, + SR_LWINSZ = 53, + SR_LMEMLOSZ = 54, + SR_LMEMHIOFF = 55, + SR_EQMASK = 56, + SR_LTMASK = 57, + SR_LEMASK = 58, + SR_GTMASK = 59, + SR_GEMASK = 60, + SR_REGALLOC = 61, + SR_BARRIERALLOC = 62, + SR63 = 63, + SR_GLOBALERRORSTATUS = 64, + SR65 = 65, + SR_WARPERRORSTATUS = 66, + SR_WARPERRORSTATUSCLEAR = 67, + SR68 = 68, + SR69 = 69, + SR70 = 70, + SR71 = 71, + SR_PM_HI0 = 72, + SR_PM_HI1 = 73, + SR_PM_HI2 = 74, + SR_PM_HI3 = 75, + SR_PM_HI4 = 76, + SR_PM_HI5 = 77, + SR_PM_HI6 = 78, + SR_PM_HI7 = 79, + SR_CLOCKLO = 80, + SR_CLOCKHI = 81, + SR_GLOBALTIMERLO = 82, + SR_GLOBALTIMERHI = 83, + SR84 = 84, + SR85 = 85, + SR86 = 86, + SR87 = 87, + SR88 = 88, + SR89 = 89, + SR90 = 90, + SR91 = 91, + SR92 = 92, + SR93 = 93, + SR94 = 94, + SR95 = 95, + SR_HWTASKID = 96, + SR_CIRCULARQUEUEENTRYINDEX = 97, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, }; [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { @@ -103,6 +133,16 @@ enum class SpecialRegister : u64 { return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); + case SpecialRegister::SR_EQMASK: + return ir.SubgroupEqMask(); + case SpecialRegister::SR_LTMASK: + return ir.SubgroupLtMask(); + case SpecialRegister::SR_LEMASK: + return ir.SubgroupLeMask(); + case SpecialRegister::SR_GTMASK: + return ir.SubgroupGtMask(); + case SpecialRegister::SR_GEMASK: + return ir.SubgroupGeMask(); default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From ffca21487f9728015a2c036fa581ead7d3d074d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 05:18:09 -0300 Subject: shader: Eliminate orphan blocks more efficiently --- src/shader_recompiler/frontend/maxwell/program.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 7b08f11b0..05b7591bc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -14,20 +14,20 @@ #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { - -static void RemoveUnreachableBlocks(IR::Program& program) { +namespace { +void RemoveUnreachableBlocks(IR::Program& program) { // Some blocks might be unreachable if a function call exists unconditionally // If this happens the number of blocks and post order blocks will mismatch if (program.blocks.size() == program.post_order_blocks.size()) { return; } - const IR::BlockList& post_order{program.post_order_blocks}; - std::erase_if(program.blocks, [&](IR::Block* block) { - return std::ranges::find(post_order, block) == post_order.end(); - }); + const auto begin{std::next(program.blocks.begin())}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); } -static void CollectInterpolationInfo(Environment& env, IR::Program& program) { +void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (program.stage != Stage::Fragment) { return; } @@ -60,6 +60,7 @@ static void CollectInterpolationInfo(Environment& env, IR::Program& program) { }(); } } +} // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { -- cgit v1.2.3 From 9e6fe430bdc615ae5f7cc4fbc32d7e2baccd7ceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 20:00:34 -0300 Subject: shader: Fix splits on blocks using indirect branches --- .../frontend/maxwell/control_flow.cpp | 35 ++++++++++++++++++---- .../frontend/maxwell/control_flow.h | 18 +++++------ src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- 3 files changed, 38 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index ac8707847..eb0f7c8d1 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -45,19 +45,29 @@ void Split(Block* old_block, Block* new_block, Location pc) { .begin{pc}, .end{old_block->end}, .end_class{old_block->end_class}, - .stack{old_block->stack}, .cond{old_block->cond}, + .stack{old_block->stack}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, + .function_call{old_block->function_call}, + .return_block{old_block->return_block}, + .branch_reg{old_block->branch_reg}, + .branch_offset{old_block->branch_offset}, + .indirect_branches{std::move(old_block->indirect_branches)}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, - .stack{std::move(old_block->stack)}, .cond{true}, + .stack{std::move(old_block->stack)}, .branch_true{new_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; } @@ -173,10 +183,15 @@ Function::Function(ObjectPool& block_pool, Location start_address) .begin{start_address}, .end{start_address}, .end_class{EndClass::Branch}, - .stack{}, .cond{true}, + .stack{}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}, .stack{}, }} {} @@ -351,10 +366,15 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .begin{block->begin.Virtual()}, .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .stack{block->stack}, .cond{cond}, + .stack{block->stack}, .branch_true{conditional_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -502,10 +522,15 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .begin{pc}, .end{pc}, .end_class{EndClass::Branch}, - .stack{stack}, .cond{true}, + .stack{stack}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}; function.labels.push_back(Label{ .address{pc}, diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index a8c90d27a..466b14198 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -79,18 +79,14 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; EndClass end_class; - Stack stack; IR::Condition cond; - union { - Block* branch_true; - FunctionId function_call; - IR::Reg branch_reg; - }; - union { - Block* branch_false; - Block* return_block; - s32 branch_offset; - }; + Stack stack; + Block* branch_true; + Block* branch_false; + FunctionId function_call; + Block* return_block; + IR::Reg branch_reg; + s32 branch_offset; std::vector indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 05b7591bc..58caa35a1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -21,7 +21,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { if (program.blocks.size() == program.post_order_blocks.size()) { return; } - const auto begin{std::next(program.blocks.begin())}; + const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); -- cgit v1.2.3 From 20ba0ea0a94fa915cad6392b3742d8e58e2fa0d9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 6 Apr 2021 02:01:01 +0200 Subject: shader: Fix BRX tracking --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 6 +++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index eb0f7c8d1..1a4ee4f6c 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -197,7 +197,7 @@ Function::Function(ObjectPool& block_pool, Location start_address) }} {} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_} { + : env{env_}, block_pool{block_pool_}, program_start{start_address} { functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -427,9 +427,9 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, FunctionId function_id) { - const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; if (!brx_table) { - TrackIndirectBranchTable(env, pc, block->begin); + TrackIndirectBranchTable(env, pc, program_start); throw NotImplementedException("Failed to track indirect branch"); } const IR::FlowTest flow_test{inst.branch.flow_test}; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 466b14198..9f570fbb5 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -157,6 +157,7 @@ private: ObjectPool& block_pool; boost::container::small_vector functions; FunctionId current_function_id{0}; + Location program_start; }; } // namespace Shader::Maxwell::Flow -- cgit v1.2.3 From 0df7e509db060693ee1f131bae44045db995c3bd Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 02:42:58 +0200 Subject: shader: Implement AL2P --- .../impl/attribute_memory_to_physical.cpp | 35 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 4 --- 2 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class BitSize : u64 { + B32, + B64, + B96, + B128, +}; + +void TranslatorVisitor::AL2P(u64 inst) { + union { + u64 raw; + BitField<0, 8, IR::Reg> result_register; + BitField<8, 8, IR::Reg> indexing_register; + BitField<20, 11, s64> offset; + BitField<47, 2, BitSize> bitsize; + } al2p{inst}; + if (al2p.bitsize != BitSize::B32) { + throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); + } + const IR::U32 converted_offset{ir.Imm32(static_cast(al2p.offset.Value()))}; + const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; + X(al2p.result_register, result); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index acabb0118..ba0cfa673 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -13,10 +13,6 @@ namespace Shader::Maxwell { throw NotImplementedException("Instruction {} is not implemented", opcode); } -void TranslatorVisitor::AL2P(u64) { - ThrowNotImplemented(Opcode::AL2P); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -- cgit v1.2.3 From 1d51803169f72f79e19995072fb9e8a371dbdcbf Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 06:47:14 +0200 Subject: shader: Implement indexed attributes --- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../translate/impl/load_store_attribute.cpp | 38 ++++++++++++++-------- 2 files changed, 26 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 58caa35a1..aaf2a74a7 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -87,7 +87,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Mon, 5 Apr 2021 04:03:12 +0200 Subject: shader: Address feedback --- .../translate/impl/load_store_attribute.cpp | 37 ++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 0d248c020..f629e7167 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -44,6 +44,17 @@ u32 NumElements(Size size) { } throw InvalidArgument("Invalid size {}", size); } + +template +void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { + const IR::U32 index_value{v.X(index_reg)}; + for (u32 element = 0; element < num_elements; ++element) { + const IR::U32 final_offset{ + element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; + f(element, final_offset); + } +} + } // Anonymous namespace void TranslatorVisitor::ALD(u64 insn) { @@ -70,18 +81,15 @@ void TranslatorVisitor::ALD(u64 insn) { throw NotImplementedException("Unaligned absolute offset {}", offset); } const u32 num_elements{NumElements(ald.size)}; - if (ald.index_reg != IR::Reg::RZ) { - const IR::U32 index_value = X(ald.index_reg); + if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::U32 final_offset = - element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))}; - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); } return; } - for (u32 element = 0; element < num_elements; ++element) { - F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); - } + HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + }); } void TranslatorVisitor::AST(u64 insn) { @@ -110,18 +118,15 @@ void TranslatorVisitor::AST(u64 insn) { throw NotImplementedException("Unaligned absolute offset {}", offset); } const u32 num_elements{NumElements(ast.size)}; - if (ast.index_reg != IR::Reg::RZ) { - const IR::U32 index_value = X(ast.index_reg); + if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::U32 final_offset = - element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))}; - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); } return; } - for (u32 element = 0; element < num_elements; ++element) { - ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); - } + HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + }); } void TranslatorVisitor::IPA(u64 insn) { -- cgit v1.2.3 From 56b92bd89cdf28f51277d6fc68115b2cd4b18864 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 05:53:38 -0300 Subject: shader: Fix F2I --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ef55b9c75..21ae92be1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -37,7 +37,7 @@ union F2I { BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; BitField<39, 2, Rounding> rounding; - BitField<49, 1, u64> half; + BitField<41, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; BitField<47, 1, u64> cc; -- cgit v1.2.3 From 233e39bb7b9ca7660c7a63a386e285aa5524bd20 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Apr 2021 16:48:39 -0300 Subject: shader: Fix dangling labels --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1a4ee4f6c..847bb1986 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -518,6 +518,11 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } return &*it; } + // Make sure we don't insert the same layer twice + const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; + if (label_it != function.labels.end()) { + return label_it->block; + } Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, -- cgit v1.2.3 From 5cd3d00167b17c1fe36f97da978a7024e93c14e7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 23:41:27 -0300 Subject: shader: Fix FCMP immediate variant --- .../frontend/maxwell/translate/impl/floating_point_compare.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index e78e9c4e1..c02a40209 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -42,7 +42,15 @@ void TranslatorVisitor::FCMP_cr(u64 insn) { } void TranslatorVisitor::FCMP_imm(u64 insn) { - FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn)); + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const fcmp{insn}; + const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; + const u32 value{static_cast(fcmp.value) << 12}; + + FCMP(*this, insn, ir.Imm32(value), GetFloatReg39(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- .../frontend/maxwell/control_flow.cpp | 140 +++++++++------------ src/shader_recompiler/frontend/maxwell/decode.cpp | 10 +- .../maxwell/indirect_branch_table_track.cpp | 10 +- .../frontend/maxwell/structured_control_flow.cpp | 3 +- .../frontend/maxwell/translate/impl/double_add.cpp | 6 +- .../translate/impl/double_fused_multiply_add.cpp | 6 +- .../maxwell/translate/impl/double_multiply.cpp | 6 +- .../maxwell/translate/impl/floating_point_add.cpp | 6 +- .../translate/impl/floating_point_compare.cpp | 3 +- .../impl/floating_point_compare_and_set.cpp | 6 +- .../floating_point_conversion_floating_point.cpp | 6 +- .../impl/floating_point_conversion_integer.cpp | 11 +- .../impl/floating_point_fused_multiply_add.cpp | 6 +- .../translate/impl/floating_point_min_max.cpp | 6 +- .../translate/impl/floating_point_multiply.cpp | 8 +- .../impl/floating_point_set_predicate.cpp | 6 +- .../translate/impl/floating_point_swizzled_add.cpp | 6 +- .../translate/impl/half_floating_point_add.cpp | 11 +- .../half_floating_point_fused_multiply_add.cpp | 11 +- .../impl/half_floating_point_multiply.cpp | 11 +- .../translate/impl/half_floating_point_set.cpp | 11 +- .../impl/half_floating_point_set_predicate.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +- .../maxwell/translate/impl/integer_add.cpp | 1 - .../impl/integer_floating_point_conversion.cpp | 4 +- .../maxwell/translate/impl/load_constant.cpp | 2 +- .../translate/impl/load_store_local_shared.cpp | 9 +- .../maxwell/translate/impl/load_store_memory.cpp | 4 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../translate/impl/texture_gather_swizzled.cpp | 2 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../maxwell/translate/impl/texture_query.cpp | 2 +- .../maxwell/translate/impl/video_set_predicate.cpp | 1 - 34 files changed, 160 insertions(+), 180 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 847bb1986..cb8ec7eaa 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -34,41 +34,37 @@ struct Compare { }; u32 BranchOffset(Location pc, Instruction inst) { - return pc.Offset() + inst.branch.Offset() + 8; + return pc.Offset() + static_cast(inst.branch.Offset()) + 8u; } void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - *new_block = Block{ - .begin{pc}, - .end{old_block->end}, - .end_class{old_block->end_class}, - .cond{old_block->cond}, - .stack{old_block->stack}, - .branch_true{old_block->branch_true}, - .branch_false{old_block->branch_false}, - .function_call{old_block->function_call}, - .return_block{old_block->return_block}, - .branch_reg{old_block->branch_reg}, - .branch_offset{old_block->branch_offset}, - .indirect_branches{std::move(old_block->indirect_branches)}, - }; - *old_block = Block{ - .begin{old_block->begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{std::move(old_block->stack)}, - .branch_true{new_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class, + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; } Token OpcodeToken(Opcode opcode) { @@ -141,7 +137,7 @@ std::string NameOf(const Block& block) { void Stack::Push(Token token, Location target) { entries.push_back({ - .token{token}, + .token = token, .target{target}, }); } @@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(ObjectPool& block_pool, Location start_address) - : entrypoint{start_address}, labels{{ - .address{start_address}, - .block{block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}, - .stack{}, - }} {} + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_}, program_start{start_address} { @@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati // Insert the function into the list if it doesn't exist const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; const bool exists{it != functions.end()}; - const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + const FunctionId call_id{exists ? static_cast(std::distance(functions.begin(), it)) + : functions.size()}; if (!exists) { functions.emplace_back(block_pool, cal_pc); } @@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, } // Create a virtual block and a conditional block Block* const conditional_block{block_pool.Create()}; - Block virtual_block{ - .begin{block->begin.Virtual()}, - .end{block->begin.Virtual()}, - .end_class{EndClass::Branch}, - .cond{cond}, - .stack{block->stack}, - .branch_true{conditional_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); // Impersonate the visited block with a virtual block @@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += brx_table->branch_offset; + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } @@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; block->indirect_branches.push_back({ - .block{branch}, - .address{target}, + .block = branch, + .address = target, }); } block->cond = IR::Condition{true}; @@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function if (label_it != function.labels.end()) { return label_it->block; } - Block* const new_block{block_pool.Create(Block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{stack}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}; + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; function.labels.push_back(Label{ .address{pc}, - .block{new_block}, + .block = new_block, .stack{std::move(stack)}, }); return new_block; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index bd85afa1e..932d19c1d 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) { bit >>= 1; } } - return MaskValue{.mask{mask}, .value{value}}; + return MaskValue{.mask = mask, .value = value}; } struct InstEncoding { @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode{Opcode::name}, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST @@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) { const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; if ((index & mask) == value) { encodings.at(element) = InstInfo{ - .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, - .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, - .opcode{encoding.opcode}, + .high_mask = static_cast(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, }; ++element; } diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp index 96453509d..008625cb3 100644 --- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -97,11 +97,11 @@ std::optional TrackIndirectBranchTable(Environment& env } const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; return IndirectBranchTableInfo{ - .cbuf_index{cbuf_index}, - .cbuf_offset{cbuf_offset}, - .num_entries{imnmx_immediate + 1}, - .branch_offset{brx_offset}, - .branch_reg{brx_reg}, + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, }; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c804c2a8e..02cef2645 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -558,7 +558,6 @@ private: const Node label{goto_stmt->label}; const u32 label_id{label->id}; const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -566,7 +565,7 @@ private: Statement* const variable{pool.Create(Variable{}, label_id)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + body.insert(goto_stmt, *loop_stmt); Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; loop_stmt->children.push_front(*new_goto); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index ac1433dea..5a1b3a8fc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dadd.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index ff7321862..723841496 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dfma.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 3e83d1c95..4a49299a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dmul.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index b39950c84..b8c89810c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; if (sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index c02a40209..80109ca0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o } const fcmp{insn}; const IR::F32 zero{v.ir.Imm32(0.0f)}; - const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index c5417775e..b9f4ee0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(fset.pred)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 1e366fde0..035f8782a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; if (f2f.src_size != f2f.dst_size) { fp_control.rounding = CastFpRounding(f2f.rounding); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 21ae92be1..cf3cf1ba6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; } const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmz_mode}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { @@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { } else if (f2i.dest_format == DestFormat::I64) { handled_special_case = true; result = IR::U64{ - v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; } } if (!handled_special_case && is_signed) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; } } @@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { void TranslatorVisitor::F2I_reg(u64 insn) { union { + u64 raw; F2I base; BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 18561bc9c..fa2a7807b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 343d91032..8ae437528 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 72f0a18ae..06226b7ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode } const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { @@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) { fmul32i.sat != 0, fmul32i.cc != 0, false); } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp index 8ff9db843..5f93a1513 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const BooleanOp bop{fsetp.bop}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index e42921a21..7550a8d4c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) { const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{CastFpRounding(fswzadd.round)}, - .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 03e7bf047..f2738a93b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; @@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { BitField<20, 9, u64> low; } const hadd2{insn}; - const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hadd2.low << 6) | static_cast((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | static_cast((hadd2.neg_high != 0 ? 1 : 0) << 31)}; HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 8b234bd6a..fd7986701 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; @@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) { BitField<57, 2, HalfPrecision> precision; } const hfma2{insn}; - const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hfma2.low << 6) | static_cast((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | static_cast((hfma2.neg_high != 0 ? 1 : 0) << 31)}; HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index 2451a6ef6..3f548ce76 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; @@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) { BitField<44, 1, u64> abs_a; } const hmul2{insn}; - const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hmul2.low << 6) | static_cast((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | static_cast((hmul2.neg_high != 0 ? 1 : 0) << 31)}; HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 7f1f4b88c..cca5b831f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hset2.pred)}; @@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) { BitField<20, 9, u64> low; } const hset2{insn}; - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hset2.low << 6) | static_cast((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | static_cast((hset2.neg_high != 0 ? 1 : 0) << 31)}; HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, Swizzle::H1_H0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 3e2a23c92..b3931dae3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; @@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) { BitField<20, 9, u64> low; } const hsetp2{insn}; - const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast(hsetp2.low << 6) | + static_cast((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | + static_cast((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, hsetp2.h_and != 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 30b570ce4..88bbac0a5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { } const IR::Value result{ir.UnpackUint2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { } const IR::Value result{ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { const auto [binding, offset_value]{CbufAddr(insn)}; const bool unaligned{cbuf.unaligned != 0}; const u32 offset{offset_value.U32()}; - const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; @@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 sign_bit{static_cast(imm.is_negative != 0 ? (1ULL << 31) : 0)}; const u32 value{static_cast(imm.value) << 12}; return ir.Imm32(Common::BitCast(value | sign_bit)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 1493e1815..8ffd84867 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } const iadd{insn}; const bool po{iadd.three_for_po == 3}; - const bool neg_a{!po && iadd.neg_a != 0}; if (!po && iadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e8b5ae1d2..5a0fc36a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const IR::Value vector{v.ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; ++i) { - v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) { } } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index ae3ecea32..2300088e3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) { } const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { - X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 68963c8ea..e24b49721 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -40,7 +40,6 @@ std::pair GetSize(u64 insn) { BitField<48, 3, Size> size; } const encoding{insn}; - const Size nnn = encoding.size; switch (encoding.size) { case Size::U8: return {8, false}; @@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } X(dest, ir.LoadLocal(word_offset)); @@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) { break; case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } for (int element = 0; element < bit_size / 32; ++element) { - X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast(element))}); } break; } @@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(reg, bit_size / 32)) { + if (!IR::IsAligned(reg, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned source register"); } ir.WriteLocal(word_offset, src); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 71688b1d7..36c5cff2f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b2da079f9..95d416586 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, if (tex.dc != 0) { value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; + value = IR::F32{v.ir.CompositeExtract(sample, static_cast(element))}; } v.F(dest_reg, value); ++dest_reg; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index d5fda20f4..fe2c7db85 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{ R | G | B | A, // }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index beab515ad..2ba9c1018 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -37,7 +37,7 @@ union Encoding { BitField<36, 13, u64> cbuf_offset; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 623b8fc23..0863bdfcd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -56,7 +56,7 @@ union Encoding { BitField<53, 4, u64> encoding; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index 8c7e04bca..0459e5473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { if (((txq.mask >> element) & 1) == 0) { continue; } - v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast(element))}); ++dest_reg; } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index af13b3fcc..ec5e74f6d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; - const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; -- cgit v1.2.3 From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: shader: Address feedback + clang format --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 4 ++-- src/shader_recompiler/frontend/maxwell/control_flow.h | 16 ++++++++-------- src/shader_recompiler/frontend/maxwell/decode.cpp | 2 +- .../frontend/maxwell/translate/impl/common_funcs.cpp | 5 +++-- .../frontend/maxwell/translate/impl/not_implemented.cpp | 1 - 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index cb8ec7eaa..9811183f1 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -44,7 +44,7 @@ void Split(Block* old_block, Block* new_block, Location pc) { *new_block = Block{}; new_block->begin = pc; new_block->end = old_block->end; - new_block->end_class = old_block->end_class, + new_block->end_class = old_block->end_class; new_block->cond = old_block->cond; new_block->stack = old_block->stack; new_block->branch_true = old_block->branch_true; @@ -428,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += static_cast(brx_table->branch_offset); + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 9f570fbb5..89966b16a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -78,15 +78,15 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; - EndClass end_class; - IR::Condition cond; + EndClass end_class{}; + IR::Condition cond{}; Stack stack; - Block* branch_true; - Block* branch_false; - FunctionId function_call; - Block* return_block; - IR::Reg branch_reg; - s32 branch_offset; + Block* branch_true{}; + Block* branch_false{}; + FunctionId function_call{}; + Block* return_block{}; + IR::Reg branch_reg{}; + s32 branch_offset{}; std::vector indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index 932d19c1d..972f677dc 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode = Opcode::name, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index d30e82b10..10bb01d99 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,8 +72,9 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, - FPCompareOp compare_op, IR::FpControl control) { +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { case FPCompareOp::F: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba0cfa673..c23901052 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,7 +65,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } -- cgit v1.2.3 From 094da34456bbf56353211b47fcb227c09637aa15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 03:44:05 -0300 Subject: shader: Fix Windows build issues --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index cf3cf1ba6..3cb896950 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -193,7 +193,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; } } -- cgit v1.2.3 From 7cb2ab358517d95ebcd35c94c72b9e91762906c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 01:45:39 -0300 Subject: shader: Implement SULD and SUST --- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/surface_load_store.cpp | 280 +++++++++++++++++++++ .../maxwell/translate/impl/texture_fetch.cpp | 19 +- .../translate/impl/texture_fetch_swizzled.cpp | 12 +- .../maxwell/translate/impl/texture_gather.cpp | 19 +- .../translate/impl/texture_gather_swizzled.cpp | 3 +- .../maxwell/translate/impl/texture_gradient.cpp | 18 +- .../maxwell/translate/impl/texture_load.cpp | 18 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../translate/impl/texture_mipmap_level.cpp | 18 +- 10 files changed, 338 insertions(+), 59 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c23901052..327941223 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -281,18 +281,10 @@ void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -void TranslatorVisitor::SULD(u64) { - ThrowNotImplemented(Opcode::SULD); -} - void TranslatorVisitor::SURED(u64) { ThrowNotImplemented(Opcode::SURED); } -void TranslatorVisitor::SUST(u64) { - ThrowNotImplemented(Opcode::SUST); -} - void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..9a2d16a6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -0,0 +1,280 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +constexpr unsigned R = 1 << 0; +constexpr unsigned G = 1 << 1; +constexpr unsigned B = 1 << 2; +constexpr unsigned A = 1 << 3; + +constexpr std::array MASK{ + 0U, // + R, // + G, // + R | G, // + B, // + R | B, // + G | B, // + R | G | B, // + A, // + R | A, // + G | A, // + R | G | A, // + B | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +enum class LoadCache : u64 { + Default, + CG, + CI, + CV, +}; + +enum class StoreCache : u64 { + Default, + CG, + CS, + WT, +}; + +ImageFormat Format(Size size) { + switch (size) { + case Size::U8: + return ImageFormat::R8_UINT; + case Size::S8: + return ImageFormat::R8_SINT; + case Size::U16: + return ImageFormat::R16_UINT; + case Size::S16: + return ImageFormat::R16_SINT; + case Size::B32: + return ImageFormat::R32_UINT; + case Size::B64: + return ImageFormat::R32G32_UINT; + case Size::B128: + return ImageFormat::R32G32B32A32_UINT; + } + throw NotImplementedException("Invalid size {}", size); +} + +int SizeInRegs(Size size) { + switch (size) { + case Size::U8: + case Size::S8: + case Size::U16: + case Size::S16: + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B128: + return 4; + } + throw NotImplementedException("Invalid size {}", size); +} + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg), array(1)); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 3)); + } + throw NotImplementedException("Invalid type {}", type); +} + +unsigned SwizzleMask(u64 swizzle) { + if (swizzle == 0 || swizzle >= MASK.size()) { + throw NotImplementedException("Invalid swizzle {}", swizzle); + } + return MASK[swizzle]; +} + +IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { + std::array colors; + for (int i = 0; i < num_regs; ++i) { + colors[i] = ir.GetReg(reg + i); + } + for (int i = num_regs; i < 4; ++i) { + colors[i] = ir.Imm32(0); + } + return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); +} +} // Anonymous namespace + +void TranslatorVisitor::SULD(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, LoadCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const suld{insn}; + + if (suld.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", suld.clamp.Value()); + } + if (suld.cache != LoadCache::Default) { + throw NotImplementedException("Cache {}", suld.cache.Value()); + } + const bool is_typed{suld.d != 0}; + if (is_typed && suld.ba != 0) { + throw NotImplementedException("BA"); + } + + const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; + const TextureType type{GetType(suld.type)}; + const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; + const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast(suld.bound_offset * 4)) + : X(suld.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + const IR::Value result{ir.ImageRead(handle, coords, info)}; + IR::Reg dest_reg{suld.dest_reg}; + if (is_typed) { + const int num_regs{SizeInRegs(suld.size)}; + for (int i = 0; i < num_regs; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } + } else { + const unsigned mask{SwizzleMask(suld.swizzle)}; + const int bits{std::popcount(mask)}; + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + throw NotImplementedException("Unaligned destination register"); + } + for (unsigned component = 0; component < 4; ++component) { + if (((mask >> component) & 1) == 0) { + continue; + } + X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); + ++dest_reg; + } + } +} + +void TranslatorVisitor::SUST(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, StoreCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sust{insn}; + + if (sust.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", sust.clamp.Value()); + } + if (sust.cache != StoreCache::Default) { + throw NotImplementedException("Cache {}", sust.cache.Value()); + } + const bool is_typed{sust.d != 0}; + if (is_typed && sust.ba != 0) { + throw NotImplementedException("BA"); + } + const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; + const TextureType type{GetType(sust.type)}; + const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; + const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast(sust.bound_offset * 4)) + : X(sust.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + IR::Value color; + if (is_typed) { + color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); + } else { + const unsigned mask{SwizzleMask(sust.swizzle)}; + if (mask != 0xf) { + throw NotImplementedException("Non-full mask"); + } + color = MakeColor(ir, sust.data_reg, 4); + } + ir.ImageWrite(handle, coords, color, info); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 95d416586..9671d115e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -33,24 +33,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -169,7 +169,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.type.Assign(GetType(tex.type)); + info.is_depth.Assign(tex.dc != 0 ? 1 : 0); info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); info.has_lod_clamp.Assign(lc ? 1 : 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index fe2c7db85..3500a4559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -95,18 +95,21 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { {}, info); case 4: // 2D.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, {}, {}, info); case 5: // 2D.LL.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b + 1), v.F(reg_b), {}, {}, info); case 6: // 2D.LZ.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), zero, {}, {}, info); case 7: // ARRAY_2D @@ -124,7 +127,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { case 9: // ARRAY_2D.LZ.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::ShadowArray2D); + info.type.Assign(TextureType::ColorArray2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), v.F(reg_b + 1), zero, {}, {}, info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index b2f9cda46..218cbc1a8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -37,24 +37,24 @@ enum class ComponentType : u64 { A = 3, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -163,7 +163,8 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld4.type, tld4.dc != 0)); + info.type.Assign(GetType(tld4.type)); + info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); info.gather_component.Assign(static_cast(component_type)); const IR::Value sample{[&] { if (tld4.dc == 0) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index 2ba9c1018..34efa2d50 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -59,7 +59,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.relaxed_precision.Assign(1); } info.gather_component.Assign(static_cast(tld4s.component_type.Value())); - info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + info.type.Assign(Shader::TextureType::Color2D); + info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); IR::Value coords; if (tld4s.aoffi != 0) { CheckAlignment(reg_a, 2); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index c66468a48..c3fe3ffda 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -152,7 +152,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; - info.type.Assign(GetType(txd.type, false)); + info.type.Assign(GetType(txd.type)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index 987b7ec34..983058303 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -137,7 +137,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { throw NotImplementedException("TLD.CL - CLAMP is not implmented"); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld.type, false)); + info.type.Assign(GetType(tld.type)); const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 0863bdfcd..5dd7e31b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include "common/bit_field.h" #include "common/common_types.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index b6efc04f0..2277d24ff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -97,7 +97,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tmml.type, false)); + info.type.Assign(GetType(tmml.type)); const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; IR::Reg dest_reg{tmml.dest_reg}; -- cgit v1.2.3 From 8cea39b5a63b350f7c6334b91b9d7e2b30bd61bf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 03:52:49 -0300 Subject: shader: Remove outdated comment in F2I --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 3cb896950..92b1ce015 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -142,10 +142,6 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } }()}; - - // TODO: Handle out of bounds conversions. - // For example converting F32 65537.0 to U16, the expected value is 0xffff, - const bool is_signed{f2i.is_signed != 0}; const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); -- cgit v1.2.3 From 1be6705408d1a3454146c705fae3dc55031e966e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 10 Apr 2021 00:29:12 +0200 Subject: shader: Implement CC for ISET, FSET, PSET, CSET, and DSET Throw when other instructions are missing CC. --- .../maxwell/translate/impl/bitfield_extract.cpp | 5 +++++ .../frontend/maxwell/translate/impl/bitfield_insert.cpp | 5 +++++ .../maxwell/translate/impl/condition_code_set.cpp | 16 ++++++++++++++-- .../maxwell/translate/impl/double_compare_and_set.cpp | 17 +++++++++++++++-- .../translate/impl/double_fused_multiply_add.cpp | 5 +++++ .../frontend/maxwell/translate/impl/double_min_max.cpp | 5 +++++ .../frontend/maxwell/translate/impl/double_multiply.cpp | 5 +++++ .../translate/impl/floating_point_compare_and_set.cpp | 17 +++++++++++++++-- .../impl/floating_point_conversion_floating_point.cpp | 5 +++++ .../maxwell/translate/impl/floating_point_min_max.cpp | 5 +++++ .../maxwell/translate/impl/integer_compare_and_set.cpp | 17 ++++++++++++++--- .../impl/integer_floating_point_conversion.cpp | 2 +- .../maxwell/translate/impl/integer_funnel_shift.cpp | 5 +++++ .../maxwell/translate/impl/integer_minimum_maximum.cpp | 5 +++++ .../maxwell/translate/impl/integer_shift_right.cpp | 4 ++++ .../maxwell/translate/impl/load_effective_address.cpp | 10 +++++++++- .../translate/impl/logic_operation_three_input.cpp | 5 +++++ .../maxwell/translate/impl/predicate_set_register.cpp | 16 ++++++++++++++-- 18 files changed, 136 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp index 4a03e6939..0738ae7a6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -14,9 +14,14 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> offset_reg; BitField<40, 1, u64> brev; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const bfe{insn}; + if (bfe.cc != 0) { + throw NotImplementedException("BFE CC"); + } + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp index ee312c30d..fb7f821e6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -13,8 +13,13 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> insert_reg; + BitField<47, 1, u64> cc; } const bfi{insn}; + if (bfi.cc != 0) { + throw NotImplementedException("BFI CC"); + } + const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; const IR::U32 max_size{v.ir.Imm32(32)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp index ea0c40a54..420f2fb94 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -18,17 +18,29 @@ void TranslatorVisitor::CSET(u64 insn) { BitField<42, 1, u64> neg_bop_pred; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; } const cset{insn}; const IR::U32 one_mask{ir.Imm32(-1)}; const IR::U32 fp_one{ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{ir.Imm32(0)}; + const IR::U32 zero{ir.Imm32(0)}; const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; - const IR::U32 result{ir.Select(pred_result, pass_result, fail_result)}; + const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; X(cset.dest_reg, result); + if (cset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (cset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } } void TranslatorVisitor::CSETP(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp index e2ec852c9..1173192e4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -19,6 +19,7 @@ void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_b; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 4, FPCompareOp> compare_op; BitField<52, 1, u64> bf; BitField<53, 1, u64> negate_b; @@ -37,10 +38,22 @@ void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; - v.X(dset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); + v.X(dset.dest_reg, result); + if (dset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (dset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index 723841496..f66097014 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -16,10 +16,15 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_a_reg; BitField<50, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; BitField<48, 1, u64> neg_b; BitField<49, 1, u64> neg_c; } const dfma{insn}; + if (dfma.cc != 0) { + throw NotImplementedException("DFMA CC"); + } + const IR::F64 src_a{v.D(dfma.src_a_reg)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp index 55a224db3..6b551847c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -17,10 +17,15 @@ void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<42, 1, u64> neg_pred; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; } const dmnmx{insn}; + if (dmnmx.cc != 0) { + throw NotImplementedException("DMNMX CC"); + } + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 4a49299a0..c0159fb65 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -16,9 +16,14 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_a_reg; BitField<39, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; BitField<48, 1, u64> neg; } const dmul{insn}; + if (dmul.cc != 0) { + throw NotImplementedException("DMUL CC"); + } + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ .no_contraction = true, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index b9f4ee0d9..eece4f28f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -19,6 +19,7 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_b; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 4, FPCompareOp> compare_op; BitField<52, 1, u64> bf; BitField<53, 1, u64> negate_b; @@ -43,10 +44,22 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; - v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); + v.X(fset.dest_reg, result); + if (fset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (fset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 035f8782a..ce2cf470d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -41,6 +41,7 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { BitField<0, 8, IR::Reg> dest_reg; BitField<44, 1, u64> ftz; BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; BitField<50, 1, u64> sat; BitField<39, 4, u64> rounding_op; BitField<39, 2, FpRounding> rounding; @@ -53,6 +54,10 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { } } const f2f{insn}; + if (f2f.cc != 0) { + throw NotImplementedException("F2F CC"); + } + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 8ae437528..c0d6ee5af 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -18,10 +18,15 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<44, 1, u64> ftz; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; } const fmnmx{insn}; + if (fmnmx.cc) { + throw NotImplementedException("FMNMX CC"); + } + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index 914af010f..a2cd8d7c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -19,6 +19,7 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<43, 1, u64> x; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const iset{insn}; @@ -38,12 +39,22 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; - - const IR::U32 result{v.ir.Select(bop_result, pass_result, fail_result)}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; v.X(iset.dest_reg, result); + if (iset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (iset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index 5a0fc36a0..3c233597f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -62,7 +62,7 @@ IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { const Encoding i2f{insn}; if (i2f.cc != 0) { - throw NotImplementedException("CC"); + throw NotImplementedException("I2F CC"); } const bool is_signed{i2f.is_signed != 0}; int src_bitsize{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp index d8d6c939e..5feefc0ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -33,9 +33,14 @@ void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& hi BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> lo_bits_reg; BitField<37, 2, MaxShift> max_shift; + BitField<47, 1, u64> cc; BitField<48, 2, u64> x_mode; BitField<50, 1, u64> wrap; } const shf{insn}; + + if (shf.cc != 0) { + throw NotImplementedException("SHF CC"); + } if (shf.x_mode != 0) { throw NotImplementedException("SHF X Mode"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 40f14ab8a..1badbacc4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -16,9 +16,14 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<39, 3, IR::Pred> pred; BitField<42, 1, u64> neg_pred; BitField<43, 2, u64> mode; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const imnmx{insn}; + if (imnmx.cc != 0) { + throw NotImplementedException("IMNMX CC"); + } + if (imnmx.mode != 0) { throw NotImplementedException("IMNMX.MODE"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp index 4025b1358..be00bb605 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -16,12 +16,16 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { BitField<39, 1, u64> is_wrapped; BitField<40, 1, u64> brev; BitField<43, 1, u64> xmode; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const shr{insn}; if (shr.xmode != 0) { throw NotImplementedException("SHR.XMODE"); } + if (shr.cc != 0) { + throw NotImplementedException("SHR.CC"); + } IR::U32 base{v.X(shr.src_reg_a)}; if (shr.brev == 1) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp index 784588e83..4a0f04e47 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -14,6 +14,7 @@ void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_ u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<47, 1, u64> cc; BitField<48, 3, IR::Pred> pred; } const lea{insn}; @@ -21,7 +22,10 @@ void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_ throw NotImplementedException("LEA.HI X"); } if (lea.pred != IR::Pred::PT) { - throw NotImplementedException("LEA.LO Pred"); + throw NotImplementedException("LEA.HI Pred"); + } + if (lea.cc != 0) { + throw NotImplementedException("LEA.HI CC"); } const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; @@ -44,6 +48,7 @@ void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { BitField<39, 5, u64> scale; BitField<45, 1, u64> neg; BitField<46, 1, u64> x; + BitField<47, 1, u64> cc; BitField<48, 3, IR::Pred> pred; } const lea{insn}; if (lea.x != 0) { @@ -52,6 +57,9 @@ void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { if (lea.pred != IR::Pred::PT) { throw NotImplementedException("LEA.LO Pred"); } + if (lea.cc != 0) { + throw NotImplementedException("LEA.LO CC"); + } const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; const s32 scale{static_cast(lea.scale)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp index 256c47504..e0fe47912 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -73,8 +73,13 @@ IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> cc; } const lop3{insn}; + if (lop3.cc != 0) { + throw NotImplementedException("LOP3 CC"); + } + const IR::U32 op_a{v.X(lop3.src_reg)}; const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; v.X(lop3.dest_reg, result); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp index 6c15963fa..b02789874 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp @@ -21,6 +21,7 @@ void TranslatorVisitor::PSET(u64 insn) { BitField<42, 1, u64> neg_pred_c; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop_2; + BitField<47, 1, u64> cc; } const pset{insn}; const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; @@ -31,11 +32,22 @@ void TranslatorVisitor::PSET(u64 insn) { const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; - const IR::U32 false_result{ir.Imm32(0)}; + const IR::U32 zero{ir.Imm32(0)}; - const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + const IR::U32 result{ir.Select(res_2, true_result, zero)}; X(pset.dest_reg, result); + if (pset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (pset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } } } // namespace Shader::Maxwell -- cgit v1.2.3 From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: shader: Implement ATOM/S and RED --- .../impl/atomic_operations_global_memory.cpp | 222 +++++++++++++++++++++ .../impl/atomic_operations_shared_memory.cpp | 110 ++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- 3 files changed, 332 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,222 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::DontCare}, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + + const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; + const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; + const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + IR::Value result; + + if (AtomOpNotApplicable(atom.size, atom.op)) { + result = LoadGlobal(ir, offset, atom.size); + } else if (!is_integer) { + if (atom.size == AtomSize::F32) { + result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; + result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); + } + } else if (size_64) { + result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); + } else { + result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); + } + StoreResult(*this, atom.dest_reg, result, atom.size); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg_b; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + + if (AtomOpNotApplicable(red.size, red.op)) { + return; + } + const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; + const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; + const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + if (!is_integer) { + if (red.size == AtomSize::F32) { + ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; + ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); + } + } else if (size_64) { + ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); + } else { + ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -void TranslatorVisitor::ATOM(u64) { - ThrowNotImplemented(Opcode::ATOM); -} - void TranslatorVisitor::ATOMS_cas(u64) { ThrowNotImplemented(Opcode::ATOMS_cas); } -void TranslatorVisitor::ATOMS(u64) { - ThrowNotImplemented(Opcode::ATOMS); -} - void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -void TranslatorVisitor::RED(u64) { - ThrowNotImplemented(Opcode::RED); -} - void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } -- cgit v1.2.3 From c9337a4ae45639c0d5b6c83c30d098878f3c344a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 04:14:55 -0300 Subject: shader: Apply sign bit in FCMP (imm) --- .../frontend/maxwell/translate/impl/floating_point_compare.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index 80109ca0e..7127ebf54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::FCMP_imm(u64 insn) { const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; const u32 value{static_cast(fcmp.value) << 12}; - FCMP(*this, insn, ir.Imm32(value), GetFloatReg39(insn)); + FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 5c61e860e4f83524ffce10ca447398e83de81640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 19:16:12 -0300 Subject: shader: Implement SR_THREAD_KILL --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index be1f21e7b..50650cc56 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_THREAD_KILL: + return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: -- cgit v1.2.3 From 2ed80f6b1e85823d7a13dfbb119545a0a0ec7427 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 19:16:47 -0300 Subject: shader: Implement LOP CC --- .../frontend/maxwell/translate/impl/logic_operation.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp index 89e5cd6de..92cd27ed4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -44,9 +44,6 @@ void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv if (x) { throw NotImplementedException("X"); } - if (cc) { - throw NotImplementedException("CC"); - } IR::U32 op_a{v.X(lop.src_reg)}; if (inv_a != 0) { op_a = v.ir.BitwiseNot(op_a); @@ -60,6 +57,17 @@ void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; v.ir.SetPred(dest_pred, pred_result); } + if (cc) { + if (bit_op == LogicalOp::PASS_B) { + v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); + v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); + } else { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } v.X(lop.dest_reg, result); } -- cgit v1.2.3 From dfd5341d7117e4299b6c34e8b1feb0e66c230478 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:40:00 -0300 Subject: shader: Mark blocks with no end branch as unreachable --- .../frontend/maxwell/structured_control_flow.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 02cef2645..e63e25aa6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -816,8 +816,13 @@ private: throw NotImplementedException("Statement type {}", stmt.type); } } - if (current_block && continue_block) { - IR::IREmitter{*current_block}.Branch(continue_block); + if (current_block) { + IR::IREmitter ir{*current_block}; + if (continue_block) { + ir.Branch(continue_block); + } else { + ir.Unreachable(); + } } } -- cgit v1.2.3 From 415c7e46ed2f00bb4611cf2913eac1b92ca130bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:54:51 -0300 Subject: shader: Simplify FLO and throw on CC --- .../maxwell/translate/impl/find_leading_one.cpp | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp index d5361bec5..f0cb25d61 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -8,26 +8,27 @@ namespace Shader::Maxwell { namespace { -void FLO(TranslatorVisitor& v, u64 insn, const IR::U32& src) { +void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<40, 1, u64> tilde; BitField<41, 1, u64> shift; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const flo{insn}; - const bool invert{flo.tilde != 0}; - const bool is_signed{flo.is_signed != 0}; - const bool shift_op{flo.shift != 0}; - - const IR::U32 operand{invert ? v.ir.BitwiseNot(src) : src}; - const IR::U32 find_result{is_signed ? v.ir.FindSMsb(operand) : v.ir.FindUMsb(operand)}; - const IR::U1 find_fail{v.ir.IEqual(find_result, v.ir.Imm32(-1))}; - const IR::U32 offset{v.ir.Imm32(31)}; - const IR::U32 success_result{shift_op ? IR::U32{v.ir.ISub(offset, find_result)} : find_result}; - - const IR::U32 result{v.ir.Select(find_fail, find_result, success_result)}; + if (flo.cc != 0) { + throw NotImplementedException("CC"); + } + if (flo.tilde != 0) { + src = v.ir.BitwiseNot(src); + } + IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; + if (flo.shift != 0) { + const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; + result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; + } v.X(flo.dest_reg, result); } } // Anonymous namespace -- cgit v1.2.3 From 2516829e4cfa30378ce049a8c66dee9b3482d673 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:15:44 -0300 Subject: shader: Fix CC in I2I --- .../frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index 2f1a58805..53e8d8923 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -105,6 +105,8 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (i2i.cc != 0) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); + v.ResetCFlag(); + v.ResetOFlag(); } } } // Anonymous namespace -- cgit v1.2.3 From f71208414775a6fca87130d2defcdeba75314084 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:33:18 -0300 Subject: shader: Stub ISBERD --- .../impl/internal_stage_buffer_entry_read.cpp | 55 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 4 -- 2 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..8c01b7d30 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +#pragma optimize("", off) + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + Patch, + Prim, + Attr, +}; + +enum class Shift : u64 { + Default, + U16, + B32, +}; + +} // Anonymous namespace + +void TranslatorVisitor::ISBERD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<31, 1, u64> skew; + BitField<32, 1, u64> o; + BitField<33, 2, Mode> mode; + BitField<47, 2, Shift> shift; + } const isberd{insn}; + + if (isberd.skew != 0) { + throw NotImplementedException("SKEW"); + } + if (isberd.o != 0) { + throw NotImplementedException("O"); + } + if (isberd.mode != Mode::Default) { + throw NotImplementedException("Mode {}", isberd.mode.Value()); + } + if (isberd.shift != Shift::Default) { + throw NotImplementedException("Shift {}", isberd.shift.Value()); + } + // LOG_WARNING(..., "ISBERD is stubbed"); + X(isberd.dest_reg, X(isberd.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index aebe3072a..694bdfccb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -141,10 +141,6 @@ void TranslatorVisitor::IMUL32I(u64) { ThrowNotImplemented(Opcode::IMUL32I); } -void TranslatorVisitor::ISBERD(u64) { - ThrowNotImplemented(Opcode::ISBERD); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } -- cgit v1.2.3 From 4b0172f6debf9ba595d5fd2d3e2329328513f5db Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:33:41 -0300 Subject: shader: Stub SR_INVOCATION_INFO --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 50650cc56..bc822d585 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -115,6 +115,9 @@ enum class SpecialRegister : u64 { switch (special_register) { case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; + case SpecialRegister::SR_INVOCATION_INFO: + // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); + return ir.Imm32(0x00ff'0000); case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: @@ -128,10 +131,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: - // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_XY (Stubbed)"); + // LOG_WARNING(..., "SR_WSCALEFACTOR_XY is stubbed"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: - // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_Z (Stubbed)"); + // LOG_WARNING(..., "SR_WSCALEFACTOR_Z is stubbed"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); -- cgit v1.2.3 From dd3432d357ce0bdf8bb295094c89bf659c939259 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Apr 2021 23:11:24 -0400 Subject: internal_stage_buffer_entry_read: Remove pragma optimize off --- .../maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index 8c01b7d30..edd6220a8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -6,8 +6,6 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#pragma optimize("", off) - namespace Shader::Maxwell { namespace { enum class Mode : u64 { -- cgit v1.2.3 From a6cef71cc0b03f929f1bc97152b302562f46bc53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 03:48:15 -0300 Subject: shader: Implement OUT --- .../translate/impl/load_store_attribute.cpp | 7 +--- .../maxwell/translate/impl/not_implemented.cpp | 12 ------ .../maxwell/translate/impl/output_geometry.cpp | 45 ++++++++++++++++++++++ 3 files changed, 47 insertions(+), 17 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index f629e7167..79293bd6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -64,7 +64,7 @@ void TranslatorVisitor::ALD(u64 insn) { BitField<8, 8, IR::Reg> index_reg; BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; - BitField<39, 8, IR::Reg> stream_reg; + BitField<39, 8, IR::Reg> array_reg; BitField<32, 1, u64> o; BitField<31, 1, u64> patch; BitField<47, 2, Size> size; @@ -100,16 +100,13 @@ void TranslatorVisitor::AST(u64 insn) { BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; BitField<31, 1, u64> patch; - BitField<39, 8, IR::Reg> stream_reg; + BitField<39, 8, IR::Reg> array_reg; BitField<47, 2, Size> size; } const ast{insn}; if (ast.patch != 0) { throw NotImplementedException("P"); } - if (ast.stream_reg != IR::Reg::RZ) { - throw NotImplementedException("Stream store"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 694bdfccb..a45d1e4be 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -169,18 +169,6 @@ void TranslatorVisitor::NOP(u64) { // NOP is No-Op. } -void TranslatorVisitor::OUT_reg(u64) { - ThrowNotImplemented(Opcode::OUT_reg); -} - -void TranslatorVisitor::OUT_cbuf(u64) { - ThrowNotImplemented(Opcode::OUT_cbuf); -} - -void TranslatorVisitor::OUT_imm(u64) { - ThrowNotImplemented(Opcode::OUT_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> output_reg; // Not needed on host + BitField<39, 1, u64> emit; + BitField<40, 1, u64> cut; + } const out{insn}; + + stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); + + if (out.emit != 0) { + v.ir.EmitVertex(stream_index); + } + if (out.cut != 0) { + v.ir.EndPrimitive(stream_index); + } + // Host doesn't need the output register, but we can write to it to avoid undefined reads + v.X(out.dest_reg, v.ir.Imm32(0)); +} +} // Anonymous namespace + +void TranslatorVisitor::OUT_reg(u64 insn) { + OUT(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::OUT_cbuf(u64 insn) { + OUT(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::OUT_imm(u64 insn) { + OUT(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From f263760c5a3aff771123b32b15677e1f7a089640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:22 -0300 Subject: shader: Implement geometry shaders --- src/shader_recompiler/frontend/maxwell/program.cpp | 13 ++++++++++++- .../maxwell/translate/impl/load_store_attribute.cpp | 16 ++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aaf2a74a7..ab67446c8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,9 +69,20 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool index_reg; BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<32, 1, u64> o; BitField<31, 1, u64> patch; BitField<47, 2, Size> size; @@ -80,15 +80,17 @@ void TranslatorVisitor::ALD(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ald.vertex_reg)}; const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); } return; } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -100,7 +102,7 @@ void TranslatorVisitor::AST(u64 insn) { BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; BitField<31, 1, u64> patch; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<47, 2, Size> size; } const ast{insn}; @@ -114,15 +116,17 @@ void TranslatorVisitor::AST(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ast.vertex_reg)}; const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); } return; } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); } -- cgit v1.2.3 From 09165ae18989c17661faf188e6825a9eb4e03a27 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 06:11:18 -0300 Subject: shader: Document and relax cache control on surface instructions --- .../maxwell/translate/impl/surface_load_store.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 9a2d16a6e..e1b8aa8ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -61,18 +61,19 @@ enum class Clamp : u64 { TRAP, }; +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators enum class LoadCache : u64 { - Default, - CG, - CI, - CV, + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (volatile) }; enum class StoreCache : u64 { - Default, - CG, - CS, - WT, + WB, // Cache write-back all coherent levels + CG, // Cache at global level (L2 and below, not L1) + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory, volatile?) }; ImageFormat Format(Size size) { @@ -188,7 +189,7 @@ void TranslatorVisitor::SULD(u64 insn) { if (suld.clamp != Clamp::IGN) { throw NotImplementedException("Clamp {}", suld.clamp.Value()); } - if (suld.cache != LoadCache::Default) { + if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { throw NotImplementedException("Cache {}", suld.cache.Value()); } const bool is_typed{suld.d != 0}; @@ -248,7 +249,7 @@ void TranslatorVisitor::SUST(u64 insn) { if (sust.clamp != Clamp::IGN) { throw NotImplementedException("Clamp {}", sust.clamp.Value()); } - if (sust.cache != StoreCache::Default) { + if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { throw NotImplementedException("Cache {}", sust.cache.Value()); } const bool is_typed{sust.d != 0}; -- cgit v1.2.3 From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: shader: Implement tessellation shaders, polygon mode and invocation id --- src/shader_recompiler/frontend/maxwell/program.cpp | 5 ++++ .../translate/impl/load_store_attribute.cpp | 33 +++++++++++++--------- .../translate/impl/move_special_register.cpp | 2 ++ 3 files changed, 27 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ab67446c8..20a1d61cc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -70,6 +70,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool size; } const ald{insn}; - if (ald.o != 0) { - throw NotImplementedException("O"); - } - if (ald.patch != 0) { - throw NotImplementedException("P"); - } const u64 offset{ald.absolute_offset.Value()}; if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); @@ -84,11 +78,19 @@ void TranslatorVisitor::ALD(u64 insn) { const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + if (ald.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetPatch(patch)); + } else { + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + } } return; } + if (ald.patch != 0) { + throw NotImplementedException("Indirect patch read"); + } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); @@ -106,9 +108,6 @@ void TranslatorVisitor::AST(u64 insn) { BitField<47, 2, Size> size; } const ast{insn}; - if (ast.patch != 0) { - throw NotImplementedException("P"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } @@ -120,11 +119,19 @@ void TranslatorVisitor::AST(u64 insn) { const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + if (ast.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + ir.SetPatch(patch, F(ast.src_reg + element)); + } else { + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + } } return; } + if (ast.patch != 0) { + throw NotImplementedException("Indexed tessellation patch store"); + } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index bc822d585..660b84c20 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_INVOCATION_ID: + return ir.InvocationId(); case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: -- cgit v1.2.3 From 95815a3883d708f71db5119f42243e183f32f9a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 17:22:59 -0300 Subject: shader: Implement PIXLD.MY_INDEX --- .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/pixel_load.cpp | 46 ++++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a45d1e4be..a4f99bbbe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,10 +181,6 @@ void TranslatorVisitor::PEXIT(u64) { ThrowNotImplemented(Opcode::PEXIT); } -void TranslatorVisitor::PIXLD(u64) { - ThrowNotImplemented(Opcode::PIXLD); -} - void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + CovMask, + Covered, + Offset, + CentroidOffset, + MyIndex, +}; +} // Anonymous namespace + +void TranslatorVisitor::PIXLD(u64 insn) { + union { + u64 raw; + BitField<31, 3, Mode> mode; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, s64> addr_offset; + BitField<45, 3, IR::Pred> dest_pred; + } const pixld{insn}; + + if (pixld.dest_pred != IR::Pred::PT) { + throw NotImplementedException("Destination predicate"); + } + if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { + throw NotImplementedException("Non-zero source register"); + } + switch (pixld.mode) { + case Mode::MyIndex: + X(pixld.dest_reg, ir.SampleId()); + break; + default: + throw NotImplementedException("Mode {}", pixld.mode.Value()); + } +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 80940b17069f6baa733a9b572445b27bc7509137 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 18:47:26 -0300 Subject: shader: Implement SampleMask --- src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp index 58a53c0ec..c2443c886 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -22,7 +22,7 @@ void ExitFragment(TranslatorVisitor& v) { } } if (sph.ps.omap.sample_mask != 0) { - throw NotImplementedException("Sample mask"); + v.ir.SetSampleMask(v.X(src_reg)); } if (sph.ps.omap.depth != 0) { v.ir.SetFragDepth(v.F(src_reg + 1)); -- cgit v1.2.3 From be431f5ed080955cce358e9750347229b2bc9a04 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Apr 2021 00:48:35 -0400 Subject: shader: Implement BFE and BFI CC Fix two bugs in BFI. --- .../maxwell/translate/impl/bitfield_extract.cpp | 11 +++++++---- .../frontend/maxwell/translate/impl/bitfield_insert.cpp | 17 +++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp index 0738ae7a6..9d5a87e52 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -18,10 +18,6 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { BitField<48, 1, u64> is_signed; } const bfe{insn}; - if (bfe.cc != 0) { - throw NotImplementedException("BFE CC"); - } - const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; @@ -53,6 +49,13 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { result = IR::U32{v.ir.Select(zero_count, zero, result)}; v.X(bfe.dest_reg, result); + + if (bfe.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp index fb7f821e6..1e1ec2119 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -16,18 +16,14 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba BitField<47, 1, u64> cc; } const bfi{insn}; - if (bfi.cc != 0) { - throw NotImplementedException("BFI CC"); - } - - const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; const IR::U32 max_size{v.ir.Imm32(32)}; // Edge case conditions - const IR::U1 zero_offset{v.ir.IEqual(offset, v.ir.Imm32(0))}; const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; - const IR::U1 exceed_count{v.ir.IGreaterThanEqual(unsafe_count, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; @@ -36,9 +32,14 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; result = IR::U32{v.ir.Select(exceed_offset, base, result)}; - result = IR::U32{v.ir.Select(zero_offset, base, result)}; v.X(bfi.dest_reg, result); + if (bfi.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace -- cgit v1.2.3 From 0a0818c0259b4f90f1f7bb37fcffbc1f194ca4d0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 03:21:03 -0300 Subject: shader: Fix memory barriers --- .../maxwell/translate/impl/barrier_operations.cpp | 30 ++++++++-------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 2a2a294df..86e433e41 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -12,34 +12,24 @@ namespace Shader::Maxwell { namespace { // Seems to be in CUDA terminology. enum class LocalScope : u64 { - CTG = 0, - GL = 1, - SYS = 2, - VC = 3, + CTA, + GL, + SYS, + VC, }; - -IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { - switch (scope) { - case LocalScope::CTG: - return IR::MemoryScope::Workgroup; - case LocalScope::GL: - return IR::MemoryScope::Device; - case LocalScope::SYS: - return IR::MemoryScope::System; - default: - throw NotImplementedException("Unimplemented Local Scope {}", scope); - } -} - } // Anonymous namespace void TranslatorVisitor::MEMBAR(u64 inst) { union { u64 raw; BitField<8, 2, LocalScope> scope; - } membar{inst}; + } const membar{inst}; - ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); + if (membar.scope == LocalScope::CTA) { + ir.WorkgroupMemoryBarrier(); + } else { + ir.DeviceMemoryBarrier(); + } } void TranslatorVisitor::DEPBAR() { -- cgit v1.2.3 From f18a6dd1bdaffda4c3e771af3cf7cf41919ebd67 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 16 Apr 2021 23:52:58 +0200 Subject: shader: Implement SR_Y_DIRECTION --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 660b84c20..b0baff74b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -150,6 +150,8 @@ enum class SpecialRegister : u64 { return ir.SubgroupGtMask(); case SpecialRegister::SR_GEMASK: return ir.SubgroupGeMask(); + case SpecialRegister::SR_Y_DIRECTION: + return ir.BitCast(ir.YDirection()); default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From 21a878237bcc6f19f41a4bce156714fd76be9d58 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 14 Apr 2021 03:41:37 +0200 Subject: shader: Implement IADD3.CC/.X --- .../translate/impl/integer_add_three_input.cpp | 29 ++++++++++++++++------ 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index c2dbd7998..e88c0ffb6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -58,13 +58,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { BitField<51, 1, u64> neg_a; } iadd3{insn}; - if (iadd3.x != 0) { - throw NotImplementedException("IADD3 X"); - } - if (iadd3.cc != 0) { - throw NotImplementedException("IADD3 CC"); - } - IR::U32 op_a{v.X(iadd3.src_a)}; op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); @@ -81,10 +74,32 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { } IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; + IR::U1 of_1; + if (iadd3.cc != 0) { + of_1 = v.ir.GetOverflowFromOp(lhs); + } + if (iadd3.x != 0) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + lhs = v.ir.IAdd(lhs, carry); + } + if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { + IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; + of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); + } lhs = IntegerShift(v.ir, lhs, iadd3.shift); const IR::U32 result{v.ir.IAdd(lhs, op_c)}; v.X(iadd3.dest_reg, result); + if (iadd3.cc != 0) { + // TODO: How does CC behave when X is set? + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); + } } } // Anonymous namespace -- cgit v1.2.3 From 881b33da3ba16fc105c6ccd20f6fbc9c4552ead9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 14 Apr 2021 03:42:40 +0200 Subject: shader: Implement F2F (Imm) --- .../floating_point_conversion_floating_point.cpp | 30 ++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index ce2cf470d..61484df57 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -179,7 +179,33 @@ void TranslatorVisitor::F2F_cbuf(u64 insn) { } void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { - throw NotImplementedException("Instruction"); -} + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + BitField<20, 20, u64> imm; + + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + const u32 imm{static_cast(f2f.imm & 0x00ffff)}; + IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, 0)}; + break; + } + case FloatFormat::F32: + src_a = GetFloatImm20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleImm20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} // namespace Shader::Maxwell } // namespace Shader::Maxwell -- cgit v1.2.3 From 29990289767c41c162473c9775ad3ba08e7ee9ea Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 10:08:22 +0200 Subject: shader: Address feedback --- .../impl/floating_point_conversion_floating_point.cpp | 15 +++++++++------ .../maxwell/translate/impl/integer_add_three_input.cpp | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 61484df57..02ab023c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -184,16 +184,19 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { BitField<49, 1, u64> abs; BitField<10, 2, FloatFormat> src_size; BitField<41, 1, u64> selector; - BitField<20, 20, u64> imm; - + BitField<20, 19, u64> imm; + BitField<56, 1, u64> imm_neg; } const f2f{insn}; IR::F16F32F64 src_a; switch (f2f.src_size) { case FloatFormat::F16: { - const u32 imm{static_cast(f2f.imm & 0x00ffff)}; - IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; - src_a = IR::F16{ir.CompositeExtract(vector, 0)}; + const u32 imm{static_cast(f2f.imm & 0x0000ffff)}; + const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; + if (f2f.imm_neg != 0) { + throw NotImplementedException("Neg bit on F16"); + } break; } case FloatFormat::F32: @@ -206,6 +209,6 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); } F2F(*this, insn, src_a, f2f.abs != 0); -} // namespace Shader::Maxwell +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index e88c0ffb6..15da90365 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -83,7 +83,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { lhs = v.ir.IAdd(lhs, carry); } if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { - IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; + const IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); } lhs = IntegerShift(v.ir, lhs, iadd3.shift); -- cgit v1.2.3 From 7018e524f5e6217b3259333acc4ea09ad036d331 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:33:23 -0300 Subject: shader: Add NVN storage buffer fallbacks When we can't track the SSBO origin of a global memory instruction, leave it as a global memory operation and assume these pointers are in the NVN storage buffer slots, then apply a linear search in the shader's runtime. --- src/shader_recompiler/frontend/maxwell/program.cpp | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 20a1d61cc..14180dcd9 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -60,6 +60,48 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { }(); } } + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + continue; + } + // Assume these are written for now + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = true, + }); + } +} } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, @@ -105,6 +147,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Mon, 19 Apr 2021 16:36:17 -0300 Subject: shader: Simplify code for local memory --- .../maxwell/translate/impl/load_store_local_shared.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index e24b49721..20df163f2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -34,6 +34,15 @@ IR::U32 Offset(TranslatorVisitor& v, u64 insn) { } } +std::pair WordOffset(TranslatorVisitor& v, u64 insn) { + const IR::U32 offset{Offset(v, insn)}; + if (offset.IsImmediate()) { + return {v.ir.Imm32(offset.U32() / 4), offset}; + } else { + return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; + } +} + std::pair GetSize(u64 insn) { union { u64 raw; @@ -79,9 +88,7 @@ IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { } // Anonymous namespace void TranslatorVisitor::LDL(u64 insn) { - const IR::U32 offset{Offset(*this, insn)}; - const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; - + const auto [word_offset, offset]{WordOffset(*this, insn)}; const IR::Reg dest{Reg(insn)}; const auto [bit_size, is_signed]{GetSize(insn)}; switch (bit_size) { @@ -133,9 +140,7 @@ void TranslatorVisitor::LDS(u64 insn) { } void TranslatorVisitor::STL(u64 insn) { - const IR::U32 offset{Offset(*this, insn)}; - const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; - + const auto [word_offset, offset]{WordOffset(*this, insn)}; const IR::Reg reg{Reg(insn)}; const IR::U32 src{X(reg)}; const int bit_size{GetSize(insn).first}; -- cgit v1.2.3 From e4d1122082e74410baac6677c850fea1a0be5c52 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:08 -0300 Subject: shader: Move siblings check to a separate function and comment them out --- .../frontend/maxwell/structured_control_flow.cpp | 37 ++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index e63e25aa6..6021ac891 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -304,6 +304,23 @@ bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { return false; } +bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { + Node it{goto_stmt}; + do { + if (it == label_stmt) { + return true; + } + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + if (it == label_stmt) { + return true; + } + ++it; + } + return false; +} + class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, @@ -353,22 +370,10 @@ private: } } } - // TODO: Remove this - { - Node it{goto_stmt}; - bool sibling{false}; - do { - sibling |= it == label_stmt; - --it; - } while (it != goto_stmt->up->children.begin()); - while (it != goto_stmt->up->children.end()) { - sibling |= it == label_stmt; - ++it; - } - if (!sibling) { - throw LogicError("Not siblings"); - } - } + // Expensive operation: + // if (!AreSiblings(goto_stmt, label_stmt)) { + // throw LogicError("Goto is not a sibling with the label"); + // } // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it -- cgit v1.2.3 From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:47 -0300 Subject: shader: Move microinstruction header to the value header --- src/shader_recompiler/frontend/maxwell/structured_control_flow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index e4797291e..a6be12ba2 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,7 +11,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" -- cgit v1.2.3 From cc0fcd1b8d3080ae83709874e1d66f9e4cf3f1be Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 03:39:35 -0300 Subject: shader: Improve goto removal algorithm complexity Find sibling node containing a nephew searching from the nephew itself instead of the uncle. --- .../frontend/maxwell/structured_control_flow.cpp | 77 ++++++++-------------- 1 file changed, 28 insertions(+), 49 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 6021ac891..b85b613f3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -222,27 +222,6 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { return ret; } -bool HasNode(const Tree& tree, ConstNode stmt) { - const auto end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { - return true; - } - } - return false; -} - -Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { - const ConstNode label_stmt{goto_stmt->label}; - const ConstNode end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { - return it; - } - } - throw LogicError("Lift label not in tree"); -} - void SanitizeNoBreaks(const Tree& tree) { if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { throw NotImplementedException("Capturing statement with break nodes"); @@ -288,22 +267,6 @@ bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); } -bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { - ++offset; - - const auto end = tree.end(); - for (ConstNode it = tree.begin(); it != end; ++it) { - ++offset; - if (stmt == it) { - return true; - } - if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { - return true; - } - } - return false; -} - bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { Node it{goto_stmt}; do { @@ -321,6 +284,30 @@ bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { return false; } +Node SiblingFromNephew(Node uncle, Node nephew) noexcept { + Statement* const parent{uncle->up}; + Statement* it{&*nephew}; + while (it->up != parent) { + it = it->up; + } + return Tree::s_iterator_to(*it); +} + +bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { + const Node end{right_sibling->up->children.end()}; + for (auto it = right_sibling; it != end; ++it) { + if (it == left_sibling) { + return false; + } + } + return true; +} + +bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { + const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; + return AreOrdered(sibling, goto_stmt); +} + class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, @@ -358,7 +345,7 @@ private: --goto_level; } } else { // Level(goto_stmt) < Level(label_stmt) - if (Offset(goto_stmt) > Offset(label_stmt)) { + if (NeedsLift(goto_stmt, label_stmt)) { // Lift goto_stmt to above stmt containing label_stmt using goto-lifting // transformations goto_stmt = Lift(goto_stmt); @@ -378,7 +365,7 @@ private: if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it goto_stmt->up->children.erase(goto_stmt); - } else if (Offset(goto_stmt) < Offset(label_stmt)) { + } else if (AreOrdered(goto_stmt, label_stmt)) { // Eliminate goto_stmt with a conditional EliminateAsConditional(goto_stmt, label_stmt); } else { @@ -523,8 +510,8 @@ private: [[nodiscard]] Node MoveInward(Node goto_stmt) { Statement* const parent{goto_stmt->up}; Tree& body{parent->children}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; const Node label{goto_stmt->label}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; const u32 label_id{label->id}; Statement* const goto_cond{goto_stmt->cond}; @@ -562,7 +549,7 @@ private: Tree& body{parent->children}; const Node label{goto_stmt->label}; const u32 label_id{label->id}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -627,14 +614,6 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - size_t Offset(ConstNode stmt) const { - size_t offset{0}; - if (!SearchNode(root_stmt.children, stmt, offset)) { - throw LogicError("Node not found in tree"); - } - return offset; - } - ObjectPool& inst_pool; ObjectPool& block_pool; ObjectPool& pool; -- cgit v1.2.3 From c8f9772d6590a018665d47a165951864ff783017 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 06:10:04 -0300 Subject: shader: Fix gcc warnings --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index b85b613f3..cc5410c6d 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -267,7 +267,7 @@ bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); } -bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { +[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { Node it{goto_stmt}; do { if (it == label_stmt) { -- cgit v1.2.3 From 427951d6fe8a0914434b0fcf897eef67749cba9d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Apr 2021 20:59:51 -0400 Subject: shader: add missing include guard in half_floating_point_helper.h --- .../frontend/maxwell/translate/impl/half_floating_point_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index 24063b2ab..59da56a7e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" -- cgit v1.2.3 From 7a9dc7839876fe5b24d1c841f182e01108ba676e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Apr 2021 21:34:51 -0400 Subject: shader: Refactor atomic_operations_global_memory --- .../impl/atomic_operations_global_memory.cpp | 80 ++++++++++------------ 1 file changed, 36 insertions(+), 44 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp index 7a32c5eb3..66f39e44e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -154,69 +154,61 @@ void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result break; } } + +IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, + AtomSize size, AtomOp op) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); + case AtomSize::U64: + case AtomSize::S64: + return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); + case AtomSize::F32: + return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); + case AtomSize::F16x2: { + return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); + } + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, + const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { + IR::Value result; + if (AtomOpNotApplicable(size, op)) { + result = LoadGlobal(v.ir, offset, size); + } else { + result = ApplyAtomOp(v, operand_reg, offset, size, op); + } + if (write_dest) { + StoreResult(v, dest_reg, result, size); + } +} } // Anonymous namespace void TranslatorVisitor::ATOM(u64 insn) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> addr_reg; - BitField<20, 8, IR::Reg> src_reg_b; + BitField<20, 8, IR::Reg> operand_reg; BitField<49, 3, AtomSize> size; BitField<52, 4, AtomOp> op; } const atom{insn}; - - const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; - const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; - const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; const IR::U64 offset{AtomOffset(*this, insn)}; - IR::Value result; - - if (AtomOpNotApplicable(atom.size, atom.op)) { - result = LoadGlobal(ir, offset, atom.size); - } else if (!is_integer) { - if (atom.size == AtomSize::F32) { - result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); - } else { - const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; - result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); - } - } else if (size_64) { - result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); - } else { - result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); - } - StoreResult(*this, atom.dest_reg, result, atom.size); + GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); } void TranslatorVisitor::RED(u64 insn) { union { u64 raw; - BitField<0, 8, IR::Reg> src_reg_b; - BitField<8, 8, IR::Reg> addr_reg; + BitField<0, 8, IR::Reg> operand_reg; BitField<20, 3, AtomSize> size; BitField<23, 3, AtomOp> op; } const red{insn}; - - if (AtomOpNotApplicable(red.size, red.op)) { - return; - } - const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; - const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; - const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; const IR::U64 offset{AtomOffset(*this, insn)}; - if (!is_integer) { - if (red.size == AtomSize::F32) { - ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); - } else { - const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; - ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); - } - } else if (size_64) { - ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); - } else { - ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); - } + GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); } } // namespace Shader::Maxwell -- cgit v1.2.3 From fe25f42403493dc4b5e801f78d7f4ec5372aa538 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:35:15 -0300 Subject: shader: Remove identity removal pass for better build times --- src/shader_recompiler/frontend/maxwell/program.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 14180dcd9..aee96eae3 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -143,7 +143,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Fri, 23 Apr 2021 07:09:02 -0300 Subject: shader: Fix VMNMX selector B --- .../frontend/maxwell/translate/impl/video_set_predicate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index ec5e74f6d..1b66abc33 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,13 +69,14 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; + const u32 b_selector{static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; const bool src_a_signed{vsetp.src_a_sign != 0}; const bool src_b_signed{vsetp.src_b_sign != 0}; const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; - const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, a_selector, src_b_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; // Compare operation's sign is only dependent on operand b's sign const bool compare_signed{src_b_signed}; -- cgit v1.2.3 From fb14820c86f082f970183c2722c5c38bcbb5a2ab Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 27 Apr 2021 21:05:41 -0400 Subject: shader: Fix IADD3.CC --- .../maxwell/translate/impl/integer_add_three_input.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 15da90365..259a6e6ac 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -73,21 +73,13 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { op_c = v.ir.INeg(op_c); } - IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; - IR::U1 of_1; - if (iadd3.cc != 0) { - of_1 = v.ir.GetOverflowFromOp(lhs); - } + IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; - lhs = v.ir.IAdd(lhs, carry); - } - if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { - const IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; - of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); + lhs_1 = v.ir.IAdd(lhs_1, carry); } - lhs = IntegerShift(v.ir, lhs, iadd3.shift); - const IR::U32 result{v.ir.IAdd(lhs, op_c)}; + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, iadd3.shift)}; + const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; v.X(iadd3.dest_reg, result); if (iadd3.cc != 0) { @@ -98,6 +90,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); v.SetCFlag(v.ir.GetCarryFromOp(result)); + const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); } } -- cgit v1.2.3 From da936d6ad8cef5418b7644754ee4bcbf7f6125f8 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 19:10:55 +0200 Subject: shader: Implement delegation of Exit to dispatcher on CFG --- .../frontend/maxwell/control_flow.cpp | 41 ++++++++++++++++++++-- .../frontend/maxwell/control_flow.h | 9 ++++- 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 9811183f1..298faa03e 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -185,8 +185,20 @@ Function::Function(ObjectPool& block_pool, Location start_address) label.block->branch_false = nullptr; } -CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_}, program_start{start_address} { +CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address, + bool exits_to_dispatcher_) + : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ + exits_to_dispatcher_} { + if (exits_to_dispatcher) { + dispatch_block = block_pool.Create(Block{}); + dispatch_block->begin = {}; + dispatch_block->end = {}; + dispatch_block->end_class = EndClass::Exit; + dispatch_block->cond = IR::Condition(true); + dispatch_block->stack = {}; + dispatch_block->branch_true = nullptr; + dispatch_block->branch_false = nullptr; + } functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -196,6 +208,12 @@ CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_addre AnalyzeLabel(function_id, label); } } + if (exits_to_dispatcher) { + const auto it = functions[0].blocks.rbegin(); + dispatch_block->begin = it->end + 1; + dispatch_block->end = it->end + 1; + functions[0].blocks.insert(*dispatch_block); + } } void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { @@ -462,11 +480,22 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati // EXIT will never be taken return AnalysisState::Continue; } + if (exits_to_dispatcher && function_id != 0) { + throw NotImplementedException("Dispatch EXIT on external function."); + } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; + if (exits_to_dispatcher) { + block->end = pc; + block->branch_true = dispatch_block; + block->end_class = EndClass::Branch; + block->cond = cond; + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); + return AnalysisState::Branch; + } AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } @@ -477,6 +506,14 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } + if (exits_to_dispatcher) { + block->cond = IR::Condition{true}; + block->end = pc; + block->end_class = EndClass::Branch; + block->branch_true = dispatch_block; + block->branch_false = nullptr; + return AnalysisState::Branch; + } block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 89966b16a..0e515c3b6 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -111,7 +111,8 @@ class CFG { }; public: - explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address); + explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address, + bool exits_to_dispatcher = false); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -128,6 +129,10 @@ public: return std::span(functions.data(), functions.size()); } + [[nodiscard]] bool ExitsToDispatcher() const { + return exits_to_dispatcher; + } + private: void AnalyzeLabel(FunctionId function_id, Label& label); @@ -158,6 +163,8 @@ private: boost::container::small_vector functions; FunctionId current_function_id{0}; Location program_start; + bool exits_to_dispatcher{}; + Block* dispatch_block{}; }; } // namespace Shader::Maxwell::Flow -- cgit v1.2.3 From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 19 Apr 2021 01:03:38 +0200 Subject: shader: Implement VertexA stage --- src/shader_recompiler/frontend/maxwell/program.cpp | 28 ++++++++++++++++++++++ src/shader_recompiler/frontend/maxwell/program.h | 2 ++ 2 files changed, 30 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aee96eae3..59897cb3e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg); +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); } // namespace Shader::Maxwell -- cgit v1.2.3 From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 14:56:25 +0200 Subject: shader: Address feedback --- .../frontend/maxwell/control_flow.cpp | 10 +++--- src/shader_recompiler/frontend/maxwell/program.cpp | 37 +++++++++++----------- src/shader_recompiler/frontend/maxwell/program.h | 1 + 3 files changed, 24 insertions(+), 24 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 298faa03e..e7abea82f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -209,9 +209,9 @@ CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_addre } } if (exits_to_dispatcher) { - const auto it = functions[0].blocks.rbegin(); - dispatch_block->begin = it->end + 1; - dispatch_block->end = it->end + 1; + const auto last_block{functions[0].blocks.rbegin()}; + dispatch_block->begin = last_block->end + 1; + dispatch_block->end = last_block->end + 1; functions[0].blocks.insert(*dispatch_block); } } @@ -481,7 +481,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } if (exits_to_dispatcher && function_id != 0) { - throw NotImplementedException("Dispatch EXIT on external function."); + throw NotImplementedException("Dispatch EXIT on external function"); } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { @@ -490,9 +490,9 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; if (exits_to_dispatcher) { block->end = pc; - block->branch_true = dispatch_block; block->end_class = EndClass::Branch; block->cond = cond; + block->branch_true = dispatch_block; block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); return AnalysisState::Branch; } diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 59897cb3e..a4fa4319d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -151,31 +151,30 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74b..01fb6f5e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_INVOCATION_INFO: // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: -- cgit v1.2.3 From 7ecc6de56ae01602b25408db8b6658d7a41a419a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 23 Apr 2021 17:47:54 -0400 Subject: shader: Implement Int32 SUATOM/SURED --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 3 +- .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/not_implemented.cpp | 4 - .../translate/impl/surface_atomic_operations.cpp | 204 +++++++++++++++++++++ 4 files changed, 207 insertions(+), 5 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index c759bd4d4..2fee591bb 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -244,7 +244,8 @@ INST(STG, "STG", "1110 1110 1101 1---") INST(STL, "STL", "1110 1111 0101 0---") INST(STP, "STP", "1110 1110 1010 0---") INST(STS, "STS", "1110 1111 0101 1---") -INST(SUATOM_cas, "SUATOM", "1110 1010 ---- ----") +INST(SUATOM, "SUATOM", "1110 1010 0--- ----") +INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----") INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bf7d1bae8..335e4f24f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -303,6 +303,7 @@ public: void STL(u64 insn); void STP(u64 insn); void STS(u64 insn); + void SUATOM(u64 insn); void SUATOM_cas(u64 insn); void SULD(u64 insn); void SURED(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4f99bbbe..7e26ab359 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -249,10 +249,6 @@ void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -void TranslatorVisitor::SURED(u64) { - ThrowNotImplemented(Opcode::SURED); -} - void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..994bdc3eb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -0,0 +1,204 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +enum class Size : u64 { + U32, + S32, + U64, + S64, + F32FTZRN, + F16x2FTZRN, + SD32, + SD64, +}; + +enum class AtomicOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + default: + break; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, + const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, + bool is_signed) { + switch (op) { + case AtomicOp::ADD: + return ir.ImageAtomicIAdd(handle, coords, op_b, info); + case AtomicOp::MIN: + return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); + case AtomicOp::MAX: + return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); + case AtomicOp::INC: + return ir.ImageAtomicInc(handle, coords, op_b, info); + case AtomicOp::DEC: + return ir.ImageAtomicDec(handle, coords, op_b, info); + case AtomicOp::AND: + return ir.ImageAtomicAnd(handle, coords, op_b, info); + case AtomicOp::OR: + return ir.ImageAtomicOr(handle, coords, op_b, info); + case AtomicOp::XOR: + return ir.ImageAtomicXor(handle, coords, op_b, info); + case AtomicOp::EXCH: + return ir.ImageAtomicExchange(handle, coords, op_b, info); + default: + throw NotImplementedException("Atomic Operation {}", op); + } +} + +ImageFormat Format(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return ImageFormat::R32_UINT; + default: + break; + } + throw NotImplementedException("Invalid size {}", size); +} + +bool IsSizeInt32(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return true; + default: + return false; + } +} + +void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, + IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, + u64 bound_offset, bool is_bindless, bool write_result) { + if (clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", clamp); + } + if (!IsSizeInt32(size)) { + throw NotImplementedException("Size {}", size); + } + const bool is_signed{size == Size::S32}; + const ImageFormat format{Format(size)}; + const TextureType tex_type{GetType(type)}; + const IR::Value coords{MakeCoords(v, coord_reg, type)}; + + const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) + : v.ir.Imm32(static_cast(bound_offset * 4))}; + IR::TextureInstInfo info{}; + info.type.Assign(tex_type); + info.image_format.Assign(format); + + // TODO: float/64-bit operand + const IR::Value op_b{v.X(operand_reg)}; + const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; + + if (write_result) { + v.X(dest_reg, IR::U32{color}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::SUATOM(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> is_bindless; + BitField<29, 4, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<51, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> operand_reg; + BitField<36, 13, u64> bound_offset; // !is_bindless + BitField<39, 8, IR::Reg> bindless_reg; // is_bindless + } const suatom{insn}; + + ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, + suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, + suatom.is_bindless != 0, true); +} + +void TranslatorVisitor::SURED(u64 insn) { + // TODO: confirm offsets + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<21, 3, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<20, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> operand_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sured{insn}; + ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, + sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, + sured.is_bound == 0, false); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 153a77efee629ccdc342e3f3f2fd49488e884233 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 17:16:54 +0200 Subject: shader: Stub SR_AFFINITY --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 01fb6f5e5..fe3cdfa96 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -159,6 +159,9 @@ enum class SpecialRegister : u64 { return ir.SubgroupGeMask(); case SpecialRegister::SR_Y_DIRECTION: return ir.BitCast(ir.YDirection()); + case SpecialRegister::SR_AFFINITY: + // LOG_WARNING(..., "SR_AFFINITY is stubbed"); + return ir.Imm32(0); // This is the default value hardware returns. default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From ee61ec2c39e6db53c56e7ac761a2223d99f06908 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 2 May 2021 01:50:27 +0200 Subject: shader: Optimize NVN Fallthrough --- src/shader_recompiler/frontend/maxwell/program.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a4fa4319d..0d3f00699 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) { }()}; auto& descs{program.info.storage_buffers_descriptors}; for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } const u32 offset{base + index * descriptor_size}; const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; continue; } - // Assume these are written for now descs.push_back({ .cbuf_index = driver_cbuf, .cbuf_offset = offset, .count = 1, - .is_written = true, + .is_written = program.info.stores_global_memory, }); } } -- cgit v1.2.3 From 09dc23f97188a4fa5ea03556a9187bfbefef1d78 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 5 May 2021 00:37:05 -0400 Subject: shader: ISET.X implementation --- .../translate/impl/integer_compare_and_set.cpp | 66 +++++++++++++++++++--- 1 file changed, 58 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index a2cd8d7c6..34fa7345c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -9,7 +9,56 @@ namespace Shader::Maxwell { namespace { -void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; @@ -24,27 +73,28 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<49, 3, CompareOp> compare_op; } const iset{insn}; - if (iset.x != 0) { - throw NotImplementedException("ISET.X"); - } - - const IR::U32 src_reg{v.X(iset.src_reg)}; + const IR::U32 src_a{v.X(iset.src_reg)}; const bool is_signed{iset.is_signed != 0}; + const IR::U32 zero{v.ir.Imm32(0)}; + const bool x{iset.x != 0}; + const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; + IR::U1 pred{v.ir.GetPred(iset.pred)}; if (iset.neg_pred != 0) { pred = v.ir.LogicalNot(pred); } - const IR::U1 cmp_result{IntegerCompare(v.ir, src_reg, src_a, iset.compare_op, is_signed)}; const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; v.X(iset.dest_reg, result); if (iset.cc != 0) { + if (x) { + throw NotImplementedException("ISET.CC + X"); + } const IR::U1 is_zero{v.ir.IEqual(result, zero)}; v.SetZFlag(is_zero); if (iset.bf != 0) { -- cgit v1.2.3 From 3b6a632237e2f8388f2591d54fb31bebdc2a0ade Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 03:55:09 -0300 Subject: shader: Add floating-point rounding to I2F --- .../maxwell/translate/impl/integer_floating_point_conversion.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index 3c233597f..d6224d5cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -99,7 +99,12 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; const int dst_bitsize{BitSize(i2f.float_format)}; - IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + const IR::FpControl fp_control{ + .no_contraction = false, + .rounding = CastFpRounding(i2f.fp_rounding), + .fmz_mode = IR::FmzMode::DontCare, + }; + auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)}; if (i2f.neg != 0) { if (i2f.abs != 0 || !is_signed) { // We know the value is positive -- cgit v1.2.3 From d54d7de40e7295827b0e4e4026441b53d3fc9569 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:40:54 -0300 Subject: glasm: Rework control flow introducing a syntax list This commit regresses VertexA shaders, their transformation pass has to be adapted to the new control flow. --- src/shader_recompiler/frontend/maxwell/program.cpp | 24 ++- .../frontend/maxwell/structured_control_flow.cpp | 235 ++++++++++++--------- .../frontend/maxwell/structured_control_flow.h | 12 +- .../frontend/maxwell/translate/translate.cpp | 7 +- .../frontend/maxwell/translate/translate.h | 2 +- 5 files changed, 161 insertions(+), 119 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0d3f00699..017c4b8fd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include "shader_recompiler/frontend/ir/basic_block.h" @@ -15,6 +16,16 @@ namespace Shader::Maxwell { namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.block; }); + return blocks; +} + void RemoveUnreachableBlocks(IR::Program& program) { // Some blocks might be unreachable if a function call exists unconditionally // If this happens the number of blocks and post order blocks will mismatch @@ -23,7 +34,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { } const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); } @@ -110,8 +121,9 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - program.blocks = VisitAST(inst_pool, block_pool, env, cfg); - program.post_order_blocks = PostOrder(program.blocks); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); program.local_memory_size = env.LocalMemorySize(); switch (program.stage) { @@ -159,9 +171,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); std::swap(result.blocks, vertex_a.blocks); - for (IR::Block* block : vertex_b.blocks) { - result.blocks.push_back(block); - } + result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); @@ -173,7 +183,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.blocks); + result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cc5410c6d..e7e2e9c82 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -36,7 +36,6 @@ using Tree = boost::intrusive::list>; using Node = Tree::iterator; -using ConstNode = Tree::const_iterator; enum class StatementType { Code, @@ -91,7 +90,8 @@ struct IndirectBranchCond {}; #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement #endif struct Statement : ListBaseHook { - Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(const Flow::Block* block_, Statement* up_) + : block{block_}, up{up_}, type{StatementType::Code} {} Statement(Goto, Statement* cond_, Node label_, Statement* up_) : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} @@ -125,7 +125,7 @@ struct Statement : ListBaseHook { } union { - IR::Block* code; + const Flow::Block* block; Node label; Tree children; IR::Condition guest_cond; @@ -171,8 +171,8 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { switch (stmt->type) { case StatementType::Code: ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, - stmt->code->LocationBegin(), stmt->code->LocationEnd(), - reinterpret_cast(stmt->code)); + stmt->block->begin, stmt->block->end, + reinterpret_cast(stmt->block)); break; case StatementType::Goto: ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), @@ -407,11 +407,7 @@ private: }}; root.push_front(make_reset_variable()); root.insert(ip, make_reset_variable()); - - const u32 begin_offset{block.begin.Offset()}; - const u32 end_offset{block.end.Offset()}; - IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; - root.insert(ip, *pool.Create(ir_block, &root_stmt)); + root.insert(ip, *pool.Create(&block, &root_stmt)); switch (block.end_class) { case Flow::EndClass::Branch: { @@ -620,13 +616,13 @@ private: Statement root_stmt{FunctionTag{}}; }; -IR::Block* TryFindForwardBlock(const Statement& stmt) { - const Tree& tree{stmt.up->children}; - const ConstNode end{tree.cend()}; - ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; +[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { + Tree& tree{stmt.up->children}; + const Node end{tree.end()}; + Node forward_node{std::next(Tree::s_iterator_to(stmt))}; while (forward_node != end && !HasChildren(forward_node->type)) { if (forward_node->type == StatementType::Code) { - return forward_node->code; + return &*forward_node; } ++forward_node; } @@ -654,21 +650,29 @@ class TranslatePass { public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::BlockList& block_list_) + IR::AbstractSyntaxList& syntax_list_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, - block_list{block_list_} { + syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); - IR::Block& first_block{*block_list.front()}; + IR::Block& first_block{*syntax_list.front().block}; IR::IREmitter ir{first_block, first_block.begin()}; ir.Prologue(); } private: - void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { + void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { + IR::Block* current_block{}; + const auto ensure_block{[&] { + if (current_block) { + return; + } + current_block = block_pool.Create(inst_pool); + auto& node{syntax_list.emplace_back()}; + node.type = IR::AbstractSyntaxNode::Type::Block; + node.block = current_block; + }}; Tree& tree{parent.children}; - IR::Block* current_block{nullptr}; - for (auto it = tree.begin(); it != tree.end(); ++it) { Statement& stmt{*it}; switch (stmt.type) { @@ -676,124 +680,157 @@ private: // Labels can be ignored break; case StatementType::Code: { - if (current_block && current_block != stmt.code) { - IR::IREmitter{*current_block}.Branch(stmt.code); - } - current_block = stmt.code; - Translate(env, stmt.code); - block_list.push_back(stmt.code); + ensure_block(); + Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); break; } case StatementType::SetVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } case StatementType::SetIndirectBranchVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; ir.SetIndirectBranchVariable(address); break; } case StatementType::If: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const merge_block{MergeBlock(parent, stmt)}; - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, merge_block, break_block); - // Implement if header block - IR::Block* const first_if_block{block_list.at(first_block_index)}; IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.SelectionMerge(merge_block); - ir.BranchConditional(cond, first_if_block, merge_block); + ir.BranchConditionRef(cond); + const size_t if_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t then_block_index{syntax_list.size()}; + Visit(stmt, break_block, merge_block); + + IR::Block* const then_block{syntax_list.at(then_block_index).block}; + current_block->AddBranch(then_block); + current_block->AddBranch(merge_block); current_block = merge_block; + + auto& if_node{syntax_list[if_node_index]}; + if_node.type = IR::AbstractSyntaxNode::Type::If; + if_node.if_node.cond = cond; + if_node.if_node.body = then_block; + if_node.if_node.merge = merge_block; + + auto& endif_node{syntax_list.emplace_back()}; + endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; + endif_node.end_if.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; if (current_block) { - IR::IREmitter{*current_block}.Branch(loop_header_block); + current_block->AddBranch(loop_header_block); } - block_list.push_back(loop_header_block); + auto& header_node{syntax_list.emplace_back()}; + header_node.type = IR::AbstractSyntaxNode::Type::Block; + header_node.block = loop_header_block; - IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const continue_block{block_pool.Create(inst_pool)}; IR::Block* const merge_block{MergeBlock(parent, stmt)}; + const size_t loop_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, new_continue_block, merge_block); + const size_t body_block_index{syntax_list.size()}; + Visit(stmt, merge_block, continue_block); // The continue block is located at the end of the loop - block_list.push_back(new_continue_block); + IR::IREmitter ir{*continue_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); - // Implement loop header block - IR::Block* const first_loop_block{block_list.at(first_block_index)}; - IR::IREmitter ir{*loop_header_block}; - ir.LoopMerge(merge_block, new_continue_block); - ir.Branch(first_loop_block); + IR::Block* const body_block{syntax_list.at(body_block_index).block}; + loop_header_block->AddBranch(body_block); - // Implement continue block - IR::IREmitter continue_ir{*new_continue_block}; - const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; - continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + continue_block->AddBranch(loop_header_block); + continue_block->AddBranch(merge_block); current_block = merge_block; + + auto& loop{syntax_list[loop_node_index]}; + loop.type = IR::AbstractSyntaxNode::Type::Loop; + loop.loop.body = body_block; + loop.loop.continue_block = continue_block; + loop.loop.merge = merge_block; + + auto& continue_block_node{syntax_list.emplace_back()}; + continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; + continue_block_node.block = continue_block; + + auto& repeat{syntax_list.emplace_back()}; + repeat.type = IR::AbstractSyntaxNode::Type::Repeat; + repeat.repeat.cond = cond; + repeat.repeat.loop_header = loop_header_block; + repeat.repeat.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Break: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const skip_block{MergeBlock(parent, stmt)}; IR::IREmitter ir{*current_block}; - ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); - + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); + current_block->AddBranch(break_block); + current_block->AddBranch(skip_block); current_block = skip_block; + + auto& break_node{syntax_list.emplace_back()}; + break_node.type = IR::AbstractSyntaxNode::Type::Break; + break_node.break_node.cond = cond; + break_node.break_node.merge = break_block; + break_node.break_node.skip = skip_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = skip_block; break; } case StatementType::Return: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter ir{*current_block}; - ir.Epilogue(); - ir.Return(); + ensure_block(); + IR::IREmitter{*current_block}.Epilogue(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; } case StatementType::Kill: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* demote_block{MergeBlock(parent, stmt)}; - IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block); + IR::IREmitter{*current_block}.DemoteToHelperInvocation(); + current_block->AddBranch(demote_block); current_block = demote_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = demote_block; break; } case StatementType::Unreachable: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter{*current_block}.Unreachable(); + ensure_block(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; break; } default: @@ -801,42 +838,42 @@ private: } } if (current_block) { - IR::IREmitter ir{*current_block}; - if (continue_block) { - ir.Branch(continue_block); + if (fallthrough_block) { + current_block->AddBranch(fallthrough_block); } else { - ir.Unreachable(); + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; } } } IR::Block* MergeBlock(Statement& parent, Statement& stmt) { - if (IR::Block* const block{TryFindForwardBlock(stmt)}) { - return block; + Statement* merge_stmt{TryFindForwardBlock(stmt)}; + if (!merge_stmt) { + // Create a merge block we can visit later + merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); } - // Create a merge block we can visit later - IR::Block* const block{block_pool.Create(inst_pool)}; - Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; - parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); - return block; + return block_pool.Create(inst_pool); } ObjectPool& stmt_pool; ObjectPool& inst_pool; ObjectPool& block_pool; Environment& env; - IR::BlockList& block_list; + IR::AbstractSyntaxList& syntax_list; + // TODO: Make this constexpr when std::vector is constexpr + const Flow::Block dummy_flow_block; }; } // Anonymous namespace -IR::BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { +IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; Statement& root{goto_pass.RootStatement()}; - IR::BlockList block_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; - return block_list; + IR::AbstractSyntaxList syntax_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + return syntax_list; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index a6be12ba2..88b083649 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -4,12 +4,8 @@ #pragma once -#include -#include - -#include - #include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -17,8 +13,8 @@ namespace Shader::Maxwell { -[[nodiscard]] IR::BlockList VisitAST(ObjectPool& inst_pool, - ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); +[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index f1230f58f..0f4e7a251 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,13 +23,12 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -void Translate(Environment& env, IR::Block* block) { - if (block->IsVirtual()) { +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { + if (location_begin == location_end) { return; } TranslatorVisitor visitor{env, *block}; - const Location pc_end{block->LocationEnd()}; - for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { + for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index e1aa2e0f4..a3edd2e46 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -9,6 +9,6 @@ namespace Shader::Maxwell { -void Translate(Environment& env, IR::Block* block); +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); } // namespace Shader::Maxwell -- cgit v1.2.3 From bf5e48ffe4bd48ea681f2a01c8919c97125e88df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 04:48:46 -0300 Subject: glasm: Initial implementation of phi nodes on GLASM --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index e7e2e9c82..836d4b8aa 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -704,7 +704,7 @@ private: // Implement if header block IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); const size_t if_node_index{syntax_list.size()}; syntax_list.emplace_back(); @@ -755,7 +755,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); IR::Block* const body_block{syntax_list.at(body_block_index).block}; loop_header_block->AddBranch(body_block); @@ -792,7 +792,7 @@ private: IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); current_block->AddBranch(break_block); current_block->AddBranch(skip_block); current_block = skip_block; -- cgit v1.2.3 From f7a2340205b4fa2db32403f20d7b7afe32b15f33 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 16 May 2021 17:06:13 -0400 Subject: shader_recompiler: GCC fixes Fixes members of unnamed union not being accessible, and one function without a declaration. --- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/structured_control_flow.cpp | 48 +++++++++++----------- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 017c4b8fd..ccdab1dad 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -22,7 +22,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { })}; IR::BlockList blocks(std::ranges::distance(syntax_blocks)); std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.block; }); + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); return blocks; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 836d4b8aa..83554a953 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -655,8 +655,8 @@ public: syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); - IR::Block& first_block{*syntax_list.front().block}; - IR::IREmitter ir{first_block, first_block.begin()}; + IR::Block& first_block{*syntax_list.front().data.block}; + IR::IREmitter ir = IR::IREmitter(first_block, first_block.begin()); ir.Prologue(); } @@ -670,7 +670,7 @@ private: current_block = block_pool.Create(inst_pool); auto& node{syntax_list.emplace_back()}; node.type = IR::AbstractSyntaxNode::Type::Block; - node.block = current_block; + node.data.block = current_block; }}; Tree& tree{parent.children}; for (auto it = tree.begin(); it != tree.end(); ++it) { @@ -713,24 +713,24 @@ private: const size_t then_block_index{syntax_list.size()}; Visit(stmt, break_block, merge_block); - IR::Block* const then_block{syntax_list.at(then_block_index).block}; + IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; current_block->AddBranch(then_block); current_block->AddBranch(merge_block); current_block = merge_block; auto& if_node{syntax_list[if_node_index]}; if_node.type = IR::AbstractSyntaxNode::Type::If; - if_node.if_node.cond = cond; - if_node.if_node.body = then_block; - if_node.if_node.merge = merge_block; + if_node.data.if_node.cond = cond; + if_node.data.if_node.body = then_block; + if_node.data.if_node.merge = merge_block; auto& endif_node{syntax_list.emplace_back()}; endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; - endif_node.end_if.merge = merge_block; + endif_node.data.end_if.merge = merge_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = merge_block; + merge.data.block = merge_block; break; } case StatementType::Loop: { @@ -740,7 +740,7 @@ private: } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; - header_node.block = loop_header_block; + header_node.data.block = loop_header_block; IR::Block* const continue_block{block_pool.Create(inst_pool)}; IR::Block* const merge_block{MergeBlock(parent, stmt)}; @@ -757,7 +757,7 @@ private: const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; ir.DummyReference(cond); - IR::Block* const body_block{syntax_list.at(body_block_index).block}; + IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); continue_block->AddBranch(loop_header_block); @@ -767,23 +767,23 @@ private: auto& loop{syntax_list[loop_node_index]}; loop.type = IR::AbstractSyntaxNode::Type::Loop; - loop.loop.body = body_block; - loop.loop.continue_block = continue_block; - loop.loop.merge = merge_block; + loop.data.loop.body = body_block; + loop.data.loop.continue_block = continue_block; + loop.data.loop.merge = merge_block; auto& continue_block_node{syntax_list.emplace_back()}; continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; - continue_block_node.block = continue_block; + continue_block_node.data.block = continue_block; auto& repeat{syntax_list.emplace_back()}; repeat.type = IR::AbstractSyntaxNode::Type::Repeat; - repeat.repeat.cond = cond; - repeat.repeat.loop_header = loop_header_block; - repeat.repeat.merge = merge_block; + repeat.data.repeat.cond = cond; + repeat.data.repeat.loop_header = loop_header_block; + repeat.data.repeat.merge = merge_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = merge_block; + merge.data.block = merge_block; break; } case StatementType::Break: { @@ -799,13 +799,13 @@ private: auto& break_node{syntax_list.emplace_back()}; break_node.type = IR::AbstractSyntaxNode::Type::Break; - break_node.break_node.cond = cond; - break_node.break_node.merge = break_block; - break_node.break_node.skip = skip_block; + break_node.data.break_node.cond = cond; + break_node.data.break_node.merge = break_block; + break_node.data.break_node.skip = skip_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = skip_block; + merge.data.block = skip_block; break; } case StatementType::Return: { @@ -824,7 +824,7 @@ private: auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = demote_block; + merge.data.block = demote_block; break; } case StatementType::Unreachable: { -- cgit v1.2.3 From ec6fc5fe78c9038fc9ad7259b7b3a7be751ecef6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 May 2021 02:52:01 -0300 Subject: glasm: Implement TEX and TEXS instructions Remove lod clamp from texture instructions with lod, as this is not needed (nor supported). --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 5 ++--- .../maxwell/translate/impl/texture_fetch_swizzled.cpp | 18 +++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 9671d115e..0046b5edd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -177,14 +177,13 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, const IR::Value sample{[&]() -> IR::Value { if (tex.dc == 0) { if (HasExplicitLod(blod)) { - return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); } else { return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); } } if (HasExplicitLod(blod)) { - return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); } else { return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 3500a4559..154e7f1a1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -81,18 +81,18 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { switch (texs.encoding) { case 0: // 1D.LZ info.type.Assign(TextureType::Color1D); - return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); case 1: // 2D info.type.Assign(TextureType::Color2D); return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); case 2: // 2D.LZ info.type.Assign(TextureType::Color2D); - return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); case 3: // 2D.LL CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, - {}, info); + info); case 4: // 2D.DC CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); @@ -105,13 +105,13 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.type.Assign(TextureType::Color2D); info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), - v.F(reg_b + 1), v.F(reg_b), {}, {}, info); + v.F(reg_b + 1), v.F(reg_b), {}, info); case 6: // 2D.LZ.DC CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), - zero, {}, {}, info); + zero, {}, info); case 7: // ARRAY_2D CheckAlignment(reg_a, 2); info.type.Assign(TextureType::ColorArray2D); @@ -123,7 +123,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.type.Assign(TextureType::ColorArray2D); return v.ir.ImageSampleExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), - zero, {}, {}, info); + zero, {}, info); case 9: // ARRAY_2D.LZ.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); @@ -131,7 +131,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), - v.F(reg_b + 1), zero, {}, {}, info); + v.F(reg_b + 1), zero, {}, info); case 10: // 3D CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color3D); @@ -141,7 +141,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color3D); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, - {}, info); + info); case 12: // CUBE CheckAlignment(reg_a, 2); info.type.Assign(TextureType::ColorCube); @@ -152,7 +152,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { CheckAlignment(reg_b, 2); info.type.Assign(TextureType::ColorCube); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), - v.F(reg_b + 1), {}, {}, info); + v.F(reg_b + 1), {}, info); default: throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); } -- cgit v1.2.3 From 9bb3e008c9f4bbdd35c095b506c3a3312d17e383 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 02:04:22 -0300 Subject: shader: Read branch conditions from an instruction Fixes the identity removal pass. --- .../frontend/maxwell/structured_control_flow.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 83554a953..ebe5c2654 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -703,8 +703,7 @@ private: // Implement if header block IR::IREmitter ir{*current_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; const size_t if_node_index{syntax_list.size()}; syntax_list.emplace_back(); @@ -754,8 +753,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -791,8 +789,7 @@ private: IR::Block* const skip_block{MergeBlock(parent, stmt)}; IR::IREmitter ir{*current_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; current_block->AddBranch(break_block); current_block->AddBranch(skip_block); current_block = skip_block; -- cgit v1.2.3 From b7764c3a796e53ac74009bc7d7cd153c64b6d743 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:51:00 -0300 Subject: shader: Handle host exceptions --- src/shader_recompiler/frontend/maxwell/opcodes.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/translate/translate.cpp | 13 +++++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp index 12ddf2ac9..ccc40c20c 100644 --- a/src/shader_recompiler/frontend/maxwell/opcodes.cpp +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, +#define INST(name, cute, encode) cute, #include "maxwell.inc" #undef INST }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ccdab1dad..900fc7ab1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/maxwell/program.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 0f4e7a251..8e3c4c5d5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -30,16 +30,21 @@ void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 locat TranslatorVisitor visitor{env, *block}; for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; - const Opcode opcode{Decode(insn)}; - switch (opcode) { + try { + const Opcode opcode{Decode(insn)}; + switch (opcode) { #define INST(name, cute, mask) \ case Opcode::name: \ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ break; #include "shader_recompiler/frontend/maxwell/maxwell.inc" #undef OPCODE - default: - throw LogicError("Invalid opcode {}", opcode); + default: + throw LogicError("Invalid opcode {}", opcode); + } + } catch (Exception& exception) { + exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); + throw; } } } -- cgit v1.2.3 From b659212dbdcac6e4f54a4306fd716b7fb74505ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 19:59:22 -0300 Subject: shader: Fix TMML queries --- .../maxwell/translate/impl/texture_mipmap_level.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 2277d24ff..abf87a0df 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -84,9 +84,6 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { if ((tmml.mask & 0b1100) != 0) { throw NotImplementedException("TMML BA results are not implmented"); } - - IR::F32 transform_constant{v.ir.Imm32(256.0f)}; - const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; IR::U32 handle; @@ -107,9 +104,16 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::F32 value{v.ir.CompositeExtract(sample, element)}; if (element < 2) { - value = v.ir.FPMul(value, transform_constant); + IR::U32 casted_value; + if (element == 0) { + casted_value = v.ir.ConvertFToU(32, value); + } else { + casted_value = v.ir.ConvertFToS(16, value); + } + v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); + } else { + v.F(dest_reg, value); } - v.F(dest_reg, value); ++dest_reg; } } -- cgit v1.2.3 From d093522fac5f3f4c2c27d30c9ad93421460792a0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:51:32 -0300 Subject: shader: Fix ImageWrite indexing --- .../frontend/maxwell/translate/impl/surface_load_store.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index e1b8aa8ad..7dc793ad7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -145,7 +145,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { case Type::ARRAY_2D: return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); case Type::_3D: - return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 3)); + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); } throw NotImplementedException("Invalid type {}", type); } -- cgit v1.2.3 From 329dea217d05a47ee00bb005eba1f0fc6b3dd0f6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 19:58:36 -0300 Subject: shader: Always initialize up reference in structure control flow Fixes ubsan issue. --- .../frontend/maxwell/structured_control_flow.cpp | 67 ++++++++++++---------- 1 file changed, 36 insertions(+), 31 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index ebe5c2654..c1e0646e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -101,22 +101,24 @@ struct Statement : ListBaseHook { : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} Statement(Break, Statement* cond_, Statement* up_) : cond{cond_}, up{up_}, type{StatementType::Break} {} - Statement(Return) : type{StatementType::Return} {} - Statement(Kill) : type{StatementType::Kill} {} - Statement(Unreachable) : type{StatementType::Unreachable} {} + Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} + Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} + Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} - Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} - Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} - Statement(Or, Statement* op_a_, Statement* op_b_) - : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(Identity, IR::Condition cond_, Statement* up_) + : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) + : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} - Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_) : branch_offset{branch_offset_}, - branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} - Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} - Statement(IndirectBranchCond, u32 location_) - : location{location_}, type{StatementType::IndirectBranchCond} {} + branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {} + Statement(Variable, u32 id_, Statement* up_) + : id{id_}, up{up_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_, Statement* up_) + : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -385,7 +387,7 @@ private: void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, std::vector& gotos, Node function_insert_point, std::optional return_label) { - Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false})}; + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)}; Tree& root{root_stmt.children}; std::unordered_map local_labels; local_labels.reserve(function.blocks.size()); @@ -411,7 +413,8 @@ private: switch (block.end_class) { case Flow::EndClass::Branch: { - Statement* const always_cond{pool.Create(Identity{}, IR::Condition{true})}; + Statement* const always_cond{ + pool.Create(Identity{}, IR::Condition{true}, &root_stmt)}; if (block.cond == IR::Condition{true}) { const Node true_label{local_labels.at(block.branch_true)}; gotos.push_back( @@ -423,7 +426,7 @@ private: } else { const Node true_label{local_labels.at(block.branch_true)}; const Node false_label{local_labels.at(block.branch_false)}; - Statement* const true_cond{pool.Create(Identity{}, block.cond)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; gotos.push_back( root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); gotos.push_back(root.insert( @@ -433,14 +436,15 @@ private: } case Flow::EndClass::IndirectBranch: root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, - block.branch_offset)); + block.branch_offset, &root_stmt)); for (const Flow::IndirectBranch& indirect : block.indirect_branches) { const Node indirect_label{local_labels.at(indirect.block)}; - Statement* cond{pool.Create(IndirectBranchCond{}, indirect.address)}; + Statement* cond{ + pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)}; Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); } - root.insert(ip, *pool.Create(Unreachable{})); + root.insert(ip, *pool.Create(Unreachable{}, &root_stmt)); break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; @@ -449,16 +453,16 @@ private: break; } case Flow::EndClass::Exit: - root.insert(ip, *pool.Create(Return{})); + root.insert(ip, *pool.Create(Return{}, &root_stmt)); break; case Flow::EndClass::Return: { - Statement* const always_cond{pool.Create(Identity{}, block.cond)}; + Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); break; } case Flow::EndClass::Kill: - root.insert(ip, *pool.Create(Kill{})); + root.insert(ip, *pool.Create(Kill{}, &root_stmt)); break; } } @@ -474,7 +478,7 @@ private: Tree& body{goto_stmt->up->children}; Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); - Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; UpdateTreeUp(if_stmt); body.insert(goto_stmt, *if_stmt); @@ -516,8 +520,8 @@ private: Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); - Statement* const variable{pool.Create(Variable{}, label_id)}; - Statement* const neg_var{pool.Create(Not{}, variable)}; + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; if (!if_body.empty()) { Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; UpdateTreeUp(if_stmt); @@ -528,7 +532,8 @@ private: switch (label_nested_stmt->type) { case StatementType::If: // Update nested if condition - label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); + label_nested_stmt->cond = + pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); break; case StatementType::Loop: break; @@ -550,7 +555,7 @@ private: Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); SanitizeNoBreaks(loop_body); - Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); body.insert(goto_stmt, *loop_stmt); @@ -577,15 +582,15 @@ private: Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); if_body.pop_front(); - Statement* const cond{pool.Create(Variable{}, label_id)}; - Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; UpdateTreeUp(if_stmt); body.insert(goto_stmt, *if_stmt); body.erase(goto_stmt); - Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; Tree& parent_tree{parent->up->children}; return parent_tree.insert(std::next(parent), *new_goto); @@ -597,14 +602,14 @@ private: const u32 label_id{goto_stmt->label->id}; Statement* const goto_cond{goto_stmt->cond}; Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; - Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; body.insert(goto_stmt, *set_goto_var); body.insert(goto_stmt, *break_stmt); body.erase(goto_stmt); const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; - Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; Tree& parent_tree{loop->up->children}; return parent_tree.insert(std::next(loop), *new_goto); -- cgit v1.2.3 From 5d170de0b5c57afdfc7c633c0b3b36d7ea9299c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:46:07 -0300 Subject: shader: Implement ISCADD32I --- .../maxwell/translate/impl/integer_scaled_add.cpp | 48 ++++++++++++++-------- 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 93cc2c0b1..044671943 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -8,40 +8,36 @@ namespace Shader::Maxwell { namespace { -void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, + u64 scale_imm) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> op_a; - BitField<47, 1, u64> cc; - BitField<48, 2, u64> three_for_po; - BitField<48, 1, u64> neg_b; - BitField<49, 1, u64> neg_a; - BitField<39, 5, u64> scale; } const iscadd{insn}; - const bool po{iscadd.three_for_po == 3}; + const bool po{neg_a && neg_b}; IR::U32 op_a{v.X(iscadd.op_a)}; - if (!po) { + if (po) { + // When PO is present, add one + op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); + } else { // When PO is not present, the bits are interpreted as negation - if (iscadd.neg_a != 0) { + if (neg_a) { op_a = v.ir.INeg(op_a); } - if (iscadd.neg_b != 0) { + if (neg_b) { op_b = v.ir.INeg(op_b); } - } else { - // When PO is present, add one - op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); } // With the operands already processed, scale A - const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; + const IR::U32 scale{v.ir.Imm32(static_cast(scale_imm))}; const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; v.X(iscadd.dest_reg, result); - if (iscadd.cc != 0) { + if (cc) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); const IR::U1 carry{v.ir.GetCarryFromOp(result)}; @@ -51,6 +47,18 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } } +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); +} + } // Anonymous namespace void TranslatorVisitor::ISCADD_reg(u64 insn) { @@ -65,8 +73,14 @@ void TranslatorVisitor::ISCADD_imm(u64 insn) { ISCADD(*this, insn, GetImm20(insn)); } -void TranslatorVisitor::ISCADD32I(u64) { - throw NotImplementedException("ISCADD32I"); +void TranslatorVisitor::ISCADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 5, u64> scale; + } const iscadd{insn}; + + return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 05d41fa9b70af6d469f2f6f1474436c9255e9bc3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 23:08:17 -0300 Subject: shader: Add support for "negative" and unaligned offsets "Negative" offsets don't exist. They are shown as such due to a bug in nvdisasm. Unaligned offsets have been proved to read the aligned offset. For example, when reading an U32, if the offset is 6, the offset read will be 4. --- src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 88bbac0a5..b446aae0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -122,14 +122,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { static std::pair CbufAddr(u64 insn) { union { u64 raw; - BitField<20, 14, s64> offset; + BitField<20, 14, u64> offset; BitField<34, 5, u64> binding; } const cbuf{insn}; if (cbuf.binding >= 18) { throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); } - if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { + if (cbuf.offset >= 0x10'000) { throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); } const IR::Value binding{static_cast(cbuf.binding)}; -- cgit v1.2.3 From 4f8b68fb0424ccd273107e45709acb6a5c35cecb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 03:10:15 -0300 Subject: shader: Avoid CPU side undefined behavior on I2F --- .../maxwell/translate/impl/integer_floating_point_conversion.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index d6224d5cc..e0e157275 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -114,6 +114,8 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { IR::U1 is_least; if (src_bitsize == 64) { is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else if (src_bitsize == 32) { + is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits::min())); } else { const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; is_least = v.ir.IEqual(src, least_value); -- cgit v1.2.3 From ec9a78885e6a07b5259c9fbec19d9756443651b1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 18:50:34 -0300 Subject: shader: Add 2D and 3D variants to SUATOM and SURED Used by Claybook. --- .../frontend/maxwell/translate/impl/surface_atomic_operations.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp index 994bdc3eb..44144f154 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -76,6 +76,10 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { case Type::_1D: case Type::BUFFER_1D: return v.X(reg); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); default: break; } -- cgit v1.2.3 From 562af301819227d65a251a2c29c997bf798da7ba Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:11:16 +0200 Subject: shader: Fix VertexA Shaders. --- src/shader_recompiler/frontend/maxwell/program.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 900fc7ab1..8489f9a5f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -171,20 +171,29 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b IR::Program result{}; Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); - std::swap(result.blocks, vertex_a.blocks); - result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); -- cgit v1.2.3 From 3c125d41348b08a467333cf0e40bed7ce26cd7cc Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 16:23:04 -0400 Subject: tmml: Remove index component from coords vec The lod query functions exposed by the rendering API's do not make use of the texturearray layer indexing. --- .../frontend/maxwell/translate/impl/texture_mipmap_level.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index abf87a0df..667c69a0d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -46,16 +46,15 @@ Shader::TextureType GetType(TextureType type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + return v.F(reg + 1); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -63,7 +62,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); } throw NotImplementedException("Invalid texture type {}", type); } -- cgit v1.2.3 From 487057b8d2bd79892423ad3a1b5a96d0407b307a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:28:06 -0300 Subject: shader: Comment why the array component is not read in TMML --- .../frontend/maxwell/translate/impl/texture_mipmap_level.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 667c69a0d..aea3c0e62 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -46,6 +46,8 @@ Shader::TextureType GetType(TextureType type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + // The ISA reads an array component here, but this is not needed on high level shading languages + // We are dropping this information. switch (type) { case TextureType::_1D: return v.F(reg); -- cgit v1.2.3 From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- .../frontend/maxwell/structured_control_flow.cpp | 42 +++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c1e0646e6..b2b8c492a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -9,11 +9,13 @@ #include #include #include +#include #include #include +#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -739,8 +741,25 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - current_block->AddBranch(loop_header_block); + const u32 this_loop_id{loop_id++}; + + if (Settings::values.disable_shader_loop_safety_checks) { + if (current_block) { + current_block->AddBranch(loop_header_block); + } + } else { + IR::Block* const init_block{block_pool.Create(inst_pool)}; + IR::IREmitter ir{*init_block}; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + if (current_block) { + current_block->AddBranch(init_block); + } + init_block->AddBranch(loop_header_block); + + auto& init_node{syntax_list.emplace_back()}; + init_node.type = IR::AbstractSyntaxNode::Type::Block; + init_node.data.block = init_block; } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -758,7 +777,16 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; + const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; + ir.SetLoopSafetyVariable(this_loop_id, new_counter); + + const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; + cond = ir.LogicalAnd(cond, safety_cond); + } + cond = ir.ConditionRef(cond); IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -863,8 +891,14 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - // TODO: Make this constexpr when std::vector is constexpr + u32 loop_id{}; + +// TODO: C++20 Remove this when all compilers support constexpr std::vector +#if __cpp_lib_constexpr_vector >= 201907 + static constexpr Flow::Block dummy_flow_block; +#else const Flow::Block dummy_flow_block; +#endif }; } // Anonymous namespace -- cgit v1.2.3 From 61cd7dd30128633b656ce3264da74bef1ba00bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 02:27:49 -0300 Subject: shader: Add logging --- .../maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | 2 +- .../frontend/maxwell/translate/impl/move_special_register.cpp | 8 ++++---- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index edd6220a8..9b85f8059 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -46,7 +46,7 @@ void TranslatorVisitor::ISBERD(u64 insn) { if (isberd.shift != Shift::Default) { throw NotImplementedException("Shift {}", isberd.shift.Value()); } - // LOG_WARNING(..., "ISBERD is stubbed"); + LOG_WARNING(Shader, "(STUBBED) called"); X(isberd.dest_reg, X(isberd.src_reg)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index fe3cdfa96..20cb2674e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -118,7 +118,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: - // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); return ir.Imm32(0x00ff'0000); case SpecialRegister::SR_TID: { const IR::Value tid{ir.LocalInvocationId()}; @@ -140,10 +140,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: - // LOG_WARNING(..., "SR_WSCALEFACTOR_XY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: - // LOG_WARNING(..., "SR_WSCALEFACTOR_Z is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); @@ -160,7 +160,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_Y_DIRECTION: return ir.BitCast(ir.YDirection()); case SpecialRegister::SR_AFFINITY: - // LOG_WARNING(..., "SR_AFFINITY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); return ir.Imm32(0); // This is the default value hardware returns. default: throw NotImplementedException("S2R special register {}", special_register); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 0793611ff..7ce370f09 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -48,7 +48,7 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // LOG_WARNING(ShaderDecompiler, "VOTE.VTG: Stubbed!"); + LOG_WARNING(Shader, "(STUBBED) called"); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 7ac55c2a750f00b41582a86eba5a44dcd781ae98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 17:00:07 -0300 Subject: shader: Fix loop safety to SSA pass --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index b2b8c492a..605ec38e1 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -750,7 +750,9 @@ private: } else { IR::Block* const init_block{block_pool.Create(inst_pool)}; IR::IREmitter ir{*init_block}; - ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + static constexpr u32 SAFETY_THRESHOLD = 0x2000; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); if (current_block) { current_block->AddBranch(init_block); -- cgit v1.2.3 From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: shader: Rename maxwell/program.h to translate_program.h --- src/shader_recompiler/frontend/maxwell/program.cpp | 203 --------------------- src/shader_recompiler/frontend/maxwell/program.h | 27 --- .../frontend/maxwell/translate_program.cpp | 203 +++++++++++++++++++++ .../frontend/maxwell/translate_program.h | 22 +++ 4 files changed, 225 insertions(+), 230 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/program.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/program.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.h (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp deleted file mode 100644 index 8489f9a5f..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" -#include "shader_recompiler/frontend/maxwell/translate/translate.h" -#include "shader_recompiler/ir_opt/passes.h" - -namespace Shader::Maxwell { -namespace { -IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); - return blocks; -} - -void RemoveUnreachableBlocks(IR::Program& program) { - // Some blocks might be unreachable if a function call exists unconditionally - // If this happens the number of blocks and post order blocks will mismatch - if (program.blocks.size() == program.post_order_blocks.size()) { - return; - } - const auto begin{program.blocks.begin() + 1}; - const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; - program.blocks.erase(std::remove_if(begin, end, pred), end); -} - -void CollectInterpolationInfo(Environment& env, IR::Program& program) { - if (program.stage != Stage::Fragment) { - return; - } - const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { - std::optional imap; - for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { - if (value == PixelImap::Unused) { - continue; - } - if (imap && imap != value) { - throw NotImplementedException("Per component interpolation"); - } - imap = value; - } - if (!imap) { - continue; - } - program.info.input_generics[index].interpolation = [&] { - switch (*imap) { - case PixelImap::Unused: - case PixelImap::Perspective: - return Interpolation::Smooth; - case PixelImap::Constant: - return Interpolation::Flat; - case PixelImap::ScreenLinear: - return Interpolation::NoPerspective; - } - throw NotImplementedException("Unknown interpolation {}", *imap); - }(); - } -} - -void AddNVNStorageBuffers(IR::Program& program) { - if (!program.info.uses_global_memory) { - return; - } - const u32 driver_cbuf{0}; - const u32 descriptor_size{0x10}; - const u32 num_buffers{16}; - const u32 base{[&] { - switch (program.stage) { - case Stage::VertexA: - case Stage::VertexB: - return 0x110u; - case Stage::TessellationControl: - return 0x210u; - case Stage::TessellationEval: - return 0x310u; - case Stage::Geometry: - return 0x410u; - case Stage::Fragment: - return 0x510u; - case Stage::Compute: - return 0x310u; - } - throw InvalidArgument("Invalid stage {}", program.stage); - }()}; - auto& descs{program.info.storage_buffers_descriptors}; - for (u32 index = 0; index < num_buffers; ++index) { - if (!program.info.nvn_buffer_used[index]) { - continue; - } - const u32 offset{base + index * descriptor_size}; - const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; - if (it != descs.end()) { - it->is_written |= program.info.stores_global_memory; - continue; - } - descs.push_back({ - .cbuf_index = driver_cbuf, - .cbuf_offset = offset, - .count = 1, - .is_written = program.info.stores_global_memory, - }); - } -} -} // Anonymous namespace - -IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { - IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); - program.blocks = GenerateBlocks(program.syntax_list); - program.post_order_blocks = PostOrder(program.syntax_list.front()); - program.stage = env.ShaderStage(); - program.local_memory_size = env.LocalMemorySize(); - switch (program.stage) { - case Stage::TessellationControl: { - const ProgramHeader& sph{env.SPH()}; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Geometry: { - const ProgramHeader& sph{env.SPH()}; - program.output_topology = sph.common3.output_topology; - program.output_vertices = sph.common4.max_output_vertices; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Compute: - program.workgroup_size = env.WorkgroupSize(); - program.shared_memory_size = env.SharedMemorySize(); - break; - default: - break; - } - RemoveUnreachableBlocks(program); - - // Replace instructions before the SSA rewrite - Optimization::LowerFp16ToFp32(program); - - Optimization::SsaRewritePass(program); - - Optimization::GlobalMemoryToStorageBufferPass(program); - Optimization::TexturePass(env, program); - - Optimization::ConstantPropagationPass(program); - Optimization::DeadCodeEliminationPass(program); - Optimization::VerificationPass(program); - Optimization::CollectShaderInfoPass(env, program); - CollectInterpolationInfo(env, program); - AddNVNStorageBuffers(program); - return program; -} - -IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b) { - IR::Program result{}; - Optimization::VertexATransformPass(vertex_a); - Optimization::VertexBTransformPass(vertex_b); - for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; - } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); - } - result.blocks = GenerateBlocks(result.syntax_list); - result.post_order_blocks = vertex_b.post_order_blocks; - for (const auto& block : vertex_a.post_order_blocks) { - result.post_order_blocks.push_back(block); - } - result.stage = Stage::VertexB; - result.info = vertex_a.info; - result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; - } - Optimization::JoinTextureInfo(result.info, vertex_b.info); - Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DeadCodeEliminationPass(result); - Optimization::VerificationPass(result); - Optimization::CollectShaderInfoPass(env_vertex_b, result); - return result; -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h deleted file mode 100644 index f7f5930e4..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include - -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/program.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::Maxwell { - -[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); - -[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..e52170e3e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -0,0 +1,203 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { +namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + return blocks; +} + +void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const auto begin{program.blocks.begin() + 1}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); +} + +void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; + continue; + } + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = program.info.stores_global_memory, + }); + } +} +} // Anonymous namespace + +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { + IR::Program program; + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); + program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); + switch (program.stage) { + case Stage::TessellationControl: { + const ProgramHeader& sph{env.SPH()}; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Geometry: { + const ProgramHeader& sph{env.SPH()}; + program.output_topology = sph.common3.output_topology; + program.output_vertices = sph.common4.max_output_vertices; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Compute: + program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); + break; + default: + break; + } + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite + Optimization::LowerFp16ToFp32(program); + + Optimization::SsaRewritePass(program); + + Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::TexturePass(env, program); + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::VerificationPass(program); + Optimization::CollectShaderInfoPass(env, program); + CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); + return program; +} + +IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b) { + IR::Program result{}; + Optimization::VertexATransformPass(vertex_a); + Optimization::VertexBTransformPass(vertex_b); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + for (size_t index = 0; index < 32; ++index) { + result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; + result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + } + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DeadCodeEliminationPass(result); + Optimization::VerificationPass(result); + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..1e5536443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); + +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); + +} // namespace Shader::Maxwell -- cgit v1.2.3 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 8 +++++--- src/shader_recompiler/frontend/maxwell/translate_program.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e..5250509c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.blocks = GenerateBlocks(program.syntax_list); @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); -- cgit v1.2.3 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 5250509c1..ed8729fca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -192,7 +192,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + if (vertex_b.info.stores_generics[index]) { + result.info.stores_generics[index] = true; + } } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); -- cgit v1.2.3 From 1091995f8e5ba79d659ab39fe4dbbca26ad01488 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 05:02:19 -0300 Subject: shader: Simplify MergeDualVertexPrograms --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index ed8729fca..e728b43cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -174,14 +174,12 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; + if (term.type != IR::AbstractSyntaxNode::Type::Return) { + result.syntax_list.push_back(term); } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); } + result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), + vertex_b.syntax_list.end()); result.blocks = GenerateBlocks(result.syntax_list); result.post_order_blocks = vertex_b.post_order_blocks; for (const auto& block : vertex_a.post_order_blocks) { -- cgit v1.2.3 From 1d182fc0f5f8a6facf6e4aebcf79d6d9a092a48c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 19 Jun 2021 21:30:27 -0300 Subject: shader: Calibrate loop safety threshold --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 605ec38e1..0fb870a69 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -751,7 +751,7 @@ private: IR::Block* const init_block{block_pool.Create(inst_pool)}; IR::IREmitter ir{*init_block}; - static constexpr u32 SAFETY_THRESHOLD = 0x2000; + static constexpr u32 SAFETY_THRESHOLD = 0x1000; ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); if (current_block) { -- cgit v1.2.3 From 808ef97a086e7cc58a3ceded1de516ad6a6be5d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 21 Jun 2021 01:07:10 -0300 Subject: shader: Move loop safety tests to code emission --- .../frontend/maxwell/structured_control_flow.cpp | 37 +++------------------- 1 file changed, 4 insertions(+), 33 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 0fb870a69..10d05dc4c 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -15,7 +15,6 @@ #include -#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -663,7 +662,7 @@ public: Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - IR::IREmitter ir = IR::IREmitter(first_block, first_block.begin()); + IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); } @@ -741,27 +740,8 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - const u32 this_loop_id{loop_id++}; - - if (Settings::values.disable_shader_loop_safety_checks) { - if (current_block) { - current_block->AddBranch(loop_header_block); - } - } else { - IR::Block* const init_block{block_pool.Create(inst_pool)}; - IR::IREmitter ir{*init_block}; - - static constexpr u32 SAFETY_THRESHOLD = 0x1000; - ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); - - if (current_block) { - current_block->AddBranch(init_block); - } - init_block->AddBranch(loop_header_block); - - auto& init_node{syntax_list.emplace_back()}; - init_node.type = IR::AbstractSyntaxNode::Type::Block; - init_node.data.block = init_block; + if (current_block) { + current_block->AddBranch(loop_header_block); } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -779,16 +759,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - if (!Settings::values.disable_shader_loop_safety_checks) { - const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; - const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; - ir.SetLoopSafetyVariable(this_loop_id, new_counter); - - const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; - cond = ir.LogicalAnd(cond, safety_cond); - } - cond = ir.ConditionRef(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); -- cgit v1.2.3 From a7536825dfd3a424ff709995653da4da0ce6dea6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 21 Jun 2021 21:07:52 -0400 Subject: shader_recompiler: Fix IADD3 input partitioning --- .../translate/impl/integer_add_three_input.cpp | 27 +++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 259a6e6ac..33e2a51ae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -42,14 +42,10 @@ enum class Half : u64 { } } -void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_a; - BitField<31, 2, Half> half_c; - BitField<33, 2, Half> half_b; - BitField<35, 2, Half> half_a; BitField<37, 2, Shift> shift; BitField<47, 1, u64> cc; BitField<48, 1, u64> x; @@ -58,11 +54,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { BitField<51, 1, u64> neg_a; } iadd3{insn}; - IR::U32 op_a{v.X(iadd3.src_a)}; - op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); - op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); - op_c = IntegerHalf(v.ir, op_c, iadd3.half_c); - if (iadd3.neg_a != 0) { op_a = v.ir.INeg(op_a); } @@ -72,7 +63,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { if (iadd3.neg_c != 0) { op_c = v.ir.INeg(op_c); } - IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; @@ -97,15 +87,24 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { } // Anonymous namespace void TranslatorVisitor::IADD3_reg(u64 insn) { - IADD3(*this, insn, GetReg20(insn), GetReg39(insn)); + union { + u64 insn; + BitField<35, 2, Half> half_a; + BitField<31, 2, Half> half_c; + BitField<33, 2, Half> half_b; + } iadd3{insn}; + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; + const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; + const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; + IADD3(*this, insn, op_a, op_b, op_c); } void TranslatorVisitor::IADD3_cbuf(u64 insn) { - IADD3(*this, insn, GetCbuf(insn), GetReg39(insn)); + IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); } void TranslatorVisitor::IADD3_imm(u64 insn) { - IADD3(*this, insn, GetImm20(insn), GetReg39(insn)); + IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e728b43cc..c084f3400 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -154,6 +154,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Wed, 23 Jun 2021 03:31:49 -0300 Subject: shader: Only verify shader when graphics debugging is enabled --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c084f3400..a8b727f1a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -7,6 +7,7 @@ #include #include +#include "common/settings.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" @@ -164,7 +165,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- .../frontend/maxwell/translate_program.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a..6b4b0ce5b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); -- cgit v1.2.3 From 8612b5fec5d39b904f9fddbbee3e06437d49429c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 17:42:07 -0300 Subject: shader: Use std::bit_cast instead of Common::BitCast for passthrough --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 6b4b0ce5b..2bb1d24a4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include #include @@ -142,8 +143,8 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + const auto& mask{env.GpPassthroughMask()}; + program.info.passthrough.mask |= ~std::bit_cast>(mask); } break; } -- cgit v1.2.3 From b21bf79bd2627797d87c17f30c776b4e2476f019 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 28 Jun 2021 22:35:31 -0300 Subject: shader: Only apply shift on register mode for IADD3 --- .../translate/impl/integer_add_three_input.cpp | 24 +++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 33e2a51ae..b50017536 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -22,31 +22,33 @@ enum class Half : u64 { [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { constexpr bool is_signed{false}; switch (half) { + case Half::All: + return value; case Half::Lower: return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); case Half::Upper: return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); - default: - return value; } + throw NotImplementedException("Invalid half"); } [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { switch (shift) { + case Shift::None: + return value; case Shift::Right: return ir.ShiftRightLogical(value, ir.Imm32(16)); case Shift::Left: return ir.ShiftLeftLogical(value, ir.Imm32(16)); - default: - return value; } + throw NotImplementedException("Invalid shift"); } -void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c) { +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, + Shift shift = Shift::None) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; - BitField<37, 2, Shift> shift; BitField<47, 1, u64> cc; BitField<48, 1, u64> x; BitField<49, 1, u64> neg_c; @@ -68,7 +70,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; lhs_1 = v.ir.IAdd(lhs_1, carry); } - const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, iadd3.shift)}; + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; v.X(iadd3.dest_reg, result); @@ -89,14 +91,16 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o void TranslatorVisitor::IADD3_reg(u64 insn) { union { u64 insn; + BitField<37, 2, Shift> shift; BitField<35, 2, Half> half_a; - BitField<31, 2, Half> half_c; BitField<33, 2, Half> half_b; - } iadd3{insn}; + BitField<31, 2, Half> half_c; + } const iadd3{insn}; + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; - IADD3(*this, insn, op_a, op_b, op_c); + IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); } void TranslatorVisitor::IADD3_cbuf(u64 insn) { -- cgit v1.2.3 From b9069c7891f2516ea037e9355daea284a1d540f1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Jun 2021 22:38:35 -0400 Subject: shader: Account for 33-bit IADD3 scenario --- .../maxwell/translate/impl/integer_add_three_input.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index b50017536..040cfc10f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -36,8 +36,12 @@ enum class Half : u64 { switch (shift) { case Shift::None: return value; - case Shift::Right: - return ir.ShiftRightLogical(value, ir.Imm32(16)); + case Shift::Right: { + // 33-bit RS IADD3 edge case + const IR::U1 edge_case{ir.GetCarryFromOp(value)}; + const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; + return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; + } case Shift::Left: return ir.ShiftLeftLogical(value, ir.Imm32(16)); } @@ -67,6 +71,10 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o } IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { + // TODO: How does RS behave when X is set? + if (shift == Shift::Right) { + throw NotImplementedException("IADD3 X+RS"); + } const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; lhs_1 = v.ir.IAdd(lhs_1, carry); } -- cgit v1.2.3 From 2e5af95541adf581364ee3864be57f9b2b9a230f Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 28 Jun 2021 23:44:03 -0400 Subject: shader: GCC fmt 8.0.0 fixes --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 10d05dc4c..06fde0017 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -174,7 +174,7 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { switch (stmt->type) { case StatementType::Code: ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, - stmt->block->begin, stmt->block->end, + stmt->block->begin.Offset(), stmt->block->end.Offset(), reinterpret_cast(stmt->block)); break; case StatementType::Goto: -- cgit v1.2.3 From 2235a51b5d987cf8297211bb1778d75e6b794324 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Jul 2021 01:10:38 -0300 Subject: shader: Manually convert from array to bitset instead of using bit_cast --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 2bb1d24a4..83c77967d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include -#include #include #include #include @@ -144,7 +143,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; + } } break; } -- cgit v1.2.3 From 49946cf780c317b4c5ccabb52ec433eba01c1970 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Jul 2021 22:10:38 -0400 Subject: shader_recompiler, video_core: Resolve clang errors Silences the following warnings-turned-errors: -Wsign-conversion -Wunused-private-field -Wbraced-scalar-init -Wunused-variable And some other errors --- src/shader_recompiler/frontend/maxwell/control_flow.h | 1 - .../frontend/maxwell/structured_control_flow.cpp | 9 ++------- .../translate/impl/atomic_operations_global_memory.cpp | 12 ++++++------ .../translate/impl/integer_floating_point_conversion.cpp | 4 +++- .../frontend/maxwell/translate/impl/load_store_attribute.cpp | 12 ++++++------ .../maxwell/translate/impl/surface_atomic_operations.cpp | 3 --- .../frontend/maxwell/translate/impl/surface_load_store.cpp | 8 ++++---- 7 files changed, 21 insertions(+), 28 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 0e515c3b6..a6bd3e196 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -161,7 +161,6 @@ private: Environment& env; ObjectPool& block_pool; boost::container::small_vector functions; - FunctionId current_function_id{0}; Location program_start; bool exits_to_dispatcher{}; Block* dispatch_block{}; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 06fde0017..221454b99 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -313,9 +313,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { class GotoPass { public: - explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, - ObjectPool& block_pool_, ObjectPool& stmt_pool) - : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + explicit GotoPass(Flow::CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); @@ -616,8 +614,6 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - ObjectPool& inst_pool; - ObjectPool& block_pool; ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -864,7 +860,6 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - u32 loop_id{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 @@ -878,7 +873,7 @@ private: IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; - GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp index 66f39e44e..d9f999e05 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -59,14 +59,14 @@ IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, AtomSize size) { static constexpr IR::FpControl f16_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::DontCare}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::DontCare, }; static constexpr IR::FpControl f32_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::FTZ}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::FTZ, }; switch (op) { case AtomOp::ADD: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e0e157275..0b8119ddd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -104,7 +104,9 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { .rounding = CastFpRounding(i2f.fp_rounding), .fmz_mode = IR::FmzMode::DontCare, }; - auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)}; + auto value{v.ir.ConvertIToF(static_cast(dst_bitsize), + static_cast(conversion_src_bitsize), is_signed, src, + fp_control)}; if (i2f.neg != 0) { if (i2f.abs != 0 || !is_signed) { // We know the value is positive diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 7d7dcc3cb..924fb7a40 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -80,10 +80,10 @@ void TranslatorVisitor::ALD(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ald.patch != 0) { const IR::Patch patch{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetPatch(patch)); + F(ald.dest_reg + static_cast(element), ir.GetPatch(patch)); } else { const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttribute(attr, vertex)); } } return; @@ -92,7 +92,7 @@ void TranslatorVisitor::ALD(u64 insn) { throw NotImplementedException("Indirect patch read"); } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -121,10 +121,10 @@ void TranslatorVisitor::AST(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ast.patch != 0) { const IR::Patch patch{offset / 4 + element}; - ir.SetPatch(patch, F(ast.src_reg + element)); + ir.SetPatch(patch, F(ast.src_reg + static_cast(element))); } else { const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + ir.SetAttribute(attr, F(ast.src_reg + static_cast(element)), vertex); } } return; @@ -133,7 +133,7 @@ void TranslatorVisitor::AST(u64 insn) { throw NotImplementedException("Indexed tessellation patch store"); } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast(element)), vertex); }); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp index 44144f154..63b588ad4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -69,9 +69,6 @@ TextureType GetType(Type type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { - const auto array{[&](int index) { - return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); - }}; switch (type) { case Type::_1D: case Type::BUFFER_1D: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 7dc793ad7..681220a8d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -160,10 +160,10 @@ unsigned SwizzleMask(u64 swizzle) { IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { std::array colors; for (int i = 0; i < num_regs; ++i) { - colors[i] = ir.GetReg(reg + i); + colors[static_cast(i)] = ir.GetReg(reg + i); } for (int i = num_regs; i < 4; ++i) { - colors[i] = ir.Imm32(0); + colors[static_cast(i)] = ir.Imm32(0); } return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); } @@ -211,12 +211,12 @@ void TranslatorVisitor::SULD(u64 insn) { if (is_typed) { const int num_regs{SizeInRegs(suld.size)}; for (int i = 0; i < num_regs; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } else { const unsigned mask{SwizzleMask(suld.swizzle)}; const int bits{std::popcount(mask)}; - if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast(bits))) { throw NotImplementedException("Unaligned destination register"); } for (unsigned component = 0; component < 4; ++component) { -- cgit v1.2.3 From bf2956d77ab0ad06c4b5505cc9906e51e5878274 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Jul 2021 05:22:01 -0300 Subject: shader: Avoid usage of C++20 ranges to build in clang --- .../frontend/maxwell/control_flow.cpp | 13 ++++++------- .../frontend/maxwell/structured_control_flow.cpp | 8 ++++---- .../frontend/maxwell/translate_program.cpp | 20 +++++++++++++------- 3 files changed, 23 insertions(+), 18 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e7abea82f..1a954a509 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -151,18 +150,18 @@ std::pair Stack::Pop(Token token) const { } std::optional Stack::Peek(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - if (it == reverse_entries.end()) { + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + if (it == entries.rend()) { return std::nullopt; } return it->target; } Stack Stack::Remove(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - const auto pos{std::distance(reverse_entries.begin(), it)}; + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + const auto pos{std::distance(entries.rbegin(), it)}; Stack result; result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); return result; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 221454b99..8b3e0a15c 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -167,7 +166,7 @@ std::string DumpExpr(const Statement* stmt) { } } -std::string DumpTree(const Tree& tree, u32 indentation = 0) { +[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { std::string ret; std::string indent(indentation, ' '); for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { @@ -315,8 +314,9 @@ class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; - for (const Node& goto_stmt : gotos | std::views::reverse) { - RemoveGoto(goto_stmt); + const auto end{gotos.rend()}; + for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { + RemoveGoto(*goto_stmt); } } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 83c77967d..c067d459c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include "common/settings.h" @@ -20,12 +19,19 @@ namespace Shader::Maxwell { namespace { IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } return blocks; } -- cgit v1.2.3 From fc7bed21b539aac4fdde74a41217066eaf8ed3f9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 12 Jul 2021 19:56:14 -0400 Subject: shader: Implement ISETP.X --- .../maxwell/translate/impl/common_funcs.cpp | 43 ++++++++++++++++++++++ .../frontend/maxwell/translate/impl/common_funcs.h | 4 ++ .../translate/impl/integer_compare_and_set.cpp | 43 ---------------------- .../translate/impl/integer_set_predicate.cpp | 11 +++++- 4 files changed, 57 insertions(+), 44 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 10bb01d99..20458d2ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -29,6 +29,49 @@ IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32 } } +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop) { switch (bop) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f584060b3..214d0af3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -11,6 +11,10 @@ namespace Shader::Maxwell { [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed); +[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, + bool is_signed); + [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index 34fa7345c..8ce1aee04 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -9,49 +9,6 @@ namespace Shader::Maxwell { namespace { -IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, - CompareOp compare_op, bool is_signed) { - const IR::U32 zero{ir.Imm32(0)}; - const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; - const IR::U1 z_flag{ir.GetZFlag()}; - const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; - const IR::U1 flip_logic{is_signed ? ir.Imm1(false) - : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), - ir.ILessThan(operand_2, zero, true))}; - switch (compare_op) { - case CompareOp::False: - return ir.Imm1(false); - case CompareOp::LessThan: - return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - case CompareOp::Equal: - return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); - case CompareOp::LessThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::GreaterThan: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), - ir.IGreaterThan(intermediate, zero, true))}; - const IR::U1 not_z{ir.LogicalNot(z_flag)}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); - } - case CompareOp::NotEqual: - return ir.LogicalOr(ir.INotEqual(intermediate, zero), - ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); - case CompareOp::GreaterThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), - ir.IGreaterThanEqual(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::True: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed, bool x) { return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 7743701d0..bee10e5b9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -9,6 +9,12 @@ namespace Shader::Maxwell { namespace { +IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { union { u64 raw; @@ -17,15 +23,18 @@ void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<8, 8, IR::Reg> src_reg_a; BitField<39, 3, IR::Pred> bop_pred; BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> x; BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const isetp{insn}; + const bool is_signed{isetp.is_signed != 0}; + const bool x{isetp.x != 0}; const BooleanOp bop{isetp.bop}; const CompareOp compare_op{isetp.compare_op}; const IR::U32 op_a{v.X(isetp.src_reg_a)}; - const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)}; + const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; -- cgit v1.2.3 From 7f13104c1778cfdfd54350e92603164070781124 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 18:28:36 -0300 Subject: shader: Support out of bound local memory reads and immediate writes Support ignoring immediate out of bound writes. Writing dynamically out of bounds is not yet supported (e.g. R0+0x4). Reading out of bounds yields zero. This is supported checking for the size from the IR; if the input is immediate, the optimization passes will drop it. --- .../translate/impl/load_store_local_shared.cpp | 25 ++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 20df163f2..d2a1dbf61 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -85,21 +85,28 @@ IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); } + +IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { + const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; + const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; + return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; +} } // Anonymous namespace void TranslatorVisitor::LDL(u64 insn) { const auto [word_offset, offset]{WordOffset(*this, insn)}; + const IR::U32 word{LoadLocal(*this, word_offset, offset)}; const IR::Reg dest{Reg(insn)}; const auto [bit_size, is_signed]{GetSize(insn)}; switch (bit_size) { case 8: { const IR::U32 bit{ByteOffset(ir, offset)}; - X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); break; } case 16: { const IR::U32 bit{ShortOffset(ir, offset)}; - X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); break; } case 32: @@ -108,9 +115,11 @@ void TranslatorVisitor::LDL(u64 insn) { if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } - X(dest, ir.LoadLocal(word_offset)); + X(dest, word); for (int i = 1; i < bit_size / 32; ++i) { - X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; + const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; + X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); } break; } @@ -141,6 +150,14 @@ void TranslatorVisitor::LDS(u64 insn) { void TranslatorVisitor::STL(u64 insn) { const auto [word_offset, offset]{WordOffset(*this, insn)}; + if (offset.IsImmediate()) { + // TODO: Support storing out of bounds at runtime + if (offset.U32() >= env.LocalMemorySize()) { + LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", + offset.U32(), env.LocalMemorySize()); + return; + } + } const IR::Reg reg{Reg(insn)}; const IR::U32 src{X(reg)}; const int bit_size{GetSize(insn).first}; -- cgit v1.2.3