From b032ebdfee1928c4458eaf15faa0cff299371e65 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Fri, 29 May 2020 14:53:27 +1000 Subject: Implement macro JIT --- src/video_core/macro/macro.cpp | 45 ++ src/video_core/macro/macro.h | 128 ++++++ src/video_core/macro/macro_interpreter.cpp | 288 +++++++++++++ src/video_core/macro/macro_interpreter.h | 102 +++++ src/video_core/macro/macro_jit_x64.cpp | 633 +++++++++++++++++++++++++++++ src/video_core/macro/macro_jit_x64.h | 98 +++++ 6 files changed, 1294 insertions(+) create mode 100644 src/video_core/macro/macro.cpp create mode 100644 src/video_core/macro/macro.h create mode 100644 src/video_core/macro/macro_interpreter.cpp create mode 100644 src/video_core/macro/macro_interpreter.h create mode 100644 src/video_core/macro/macro_jit_x64.cpp create mode 100644 src/video_core/macro/macro_jit_x64.h (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp new file mode 100644 index 000000000..85a6e5dd4 --- /dev/null +++ b/src/video_core/macro/macro.cpp @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/settings.h" +#include "video_core/macro/macro.h" +#include "video_core/macro/macro_interpreter.h" +#include "video_core/macro/macro_jit_x64.h" + +namespace Tegra { + +void MacroEngine::AddCode(u32 method, u32 data) { + uploaded_macro_code[method].push_back(data); +} + +void MacroEngine::Execute(u32 method, std::vector parameters) { + auto compiled_macro = macro_cache.find(method); + if (compiled_macro != macro_cache.end()) { + compiled_macro->second->Execute(parameters, method); + } else { + // Macro not compiled, check if it's uploaded and if so, compile it + auto macro_code = uploaded_macro_code.find(method); + if (macro_code == uploaded_macro_code.end()) { + UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); + return; + } + macro_cache[method] = Compile(macro_code->second); + macro_cache[method]->Execute(parameters, method); + } +} + +std::unique_ptr GetMacroEngine(Engines::Maxwell3D& maxwell3d) { + if (Settings::values.disable_macro_jit) { + return std::make_unique(maxwell3d); + } +#ifdef ARCHITECTURE_x86_64 + return std::make_unique(maxwell3d); +#else + return std::make_unique(maxwell3d); +#endif +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h new file mode 100644 index 000000000..28ca243d1 --- /dev/null +++ b/src/video_core/macro/macro.h @@ -0,0 +1,128 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Tegra { +namespace Engines { +class Maxwell3D; +} +namespace Macro { +constexpr std::size_t NUM_MACRO_REGISTERS = 8; +enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, +}; + +enum class ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 +}; + +enum class ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 +}; + +enum class BranchCondition : u32 { + Zero = 0, + NotZero = 1, +}; + +union Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + // If set on a branch, then the branch doesn't have a delay slot. + BitField<5, 1, u32> branch_annul; + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + + s32 GetBranchTarget() const { + return static_cast(immediate * sizeof(u32)); + } +}; + +union MethodAddress { + u32 raw; + BitField<0, 12, u32> address; + BitField<12, 6, u32> increment; +}; + +} // namespace Macro + +class CachedMacro { +public: + virtual ~CachedMacro() = default; + /** + * Executes the macro code with the specified input parameters. + * @param code The macro byte code to execute + * @param parameters The parameters of the macro + */ + virtual void Execute(std::vector& parameters, u32 method) = 0; +}; + +class MacroEngine { +public: + virtual ~MacroEngine() = default; + + // Store the uploaded macro code to compile them when they're called. + void AddCode(u32 method, u32 data); + + // Compiles the macro if its not in the cache, and executes the compiled macro + void Execute(u32 method, std::vector parameters); + +protected: + virtual std::unique_ptr Compile(const std::vector& code) = 0; + +private: + std::unordered_map> macro_cache; + std::unordered_map> uploaded_macro_code; +}; + +std::unique_ptr GetMacroEngine(Engines::Maxwell3D& maxwell3d); + +} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp new file mode 100644 index 000000000..e63296a21 --- /dev/null +++ b/src/video_core/macro/macro_interpreter.cpp @@ -0,0 +1,288 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_interpreter.h" + +MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); + +namespace Tegra { +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + +std::unique_ptr MacroInterpreter::Compile(const std::vector& code) { + return std::make_unique(maxwell3d, code); +} + +MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, + const std::vector& code) + : maxwell3d(maxwell3d), code(code) {} + +void MacroInterpreterImpl::Execute(std::vector& parameters, u32 method) { + MICROPROFILE_SCOPE(MacroInterp); + Reset(); + + registers[1] = parameters[0]; + num_parameters = parameters.size(); + + if (num_parameters > parameters_capacity) { + parameters_capacity = num_parameters; + this->parameters = std::make_unique(num_parameters); + } + std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); + this->num_parameters = num_parameters; + + // Execute the code until we hit an exit condition. + bool keep_executing = true; + while (keep_executing) { + keep_executing = Step(false); + } + + // Assert the the macro used all the input parameters + ASSERT(next_parameter_index == num_parameters); +} + +void MacroInterpreterImpl::Reset() { + registers = {}; + pc = 0; + delayed_pc = {}; + method_address.raw = 0; + num_parameters = 0; + // The next parameter index starts at 1, because $r1 already has the value of the first + // parameter. + next_parameter_index = 1; + carry_flag = false; +} + +bool MacroInterpreterImpl::Step(bool is_delay_slot) { + u32 base_address = pc; + + Macro::Opcode opcode = GetOpcode(); + pc += 4; + + // Update the program counter if we were delayed + if (delayed_pc) { + ASSERT(is_delay_slot); + pc = *delayed_pc; + delayed_pc = {}; + } + + switch (opcode.operation) { + case Macro::Operation::ALU: { + u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), + GetRegister(opcode.src_b)); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::AddImmediate: { + ProcessResult(opcode.result_operation, opcode.dst, + GetRegister(opcode.src_a) + opcode.immediate); + break; + } + case Macro::Operation::ExtractInsert: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); + dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + dst |= src << opcode.bf_dst_bit; + ProcessResult(opcode.result_operation, opcode.dst, dst); + break; + } + case Macro::Operation::ExtractShiftLeftImmediate: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::ExtractShiftLeftRegister: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::Read: { + u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::Branch: { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + u32 value = GetRegister(opcode.src_a); + bool taken = EvaluateBranchCondition(opcode.branch_condition, value); + if (taken) { + // Ignore the delay slot if the branch has the annul bit. + if (opcode.branch_annul) { + pc = base_address + opcode.GetBranchTarget(); + return true; + } + + delayed_pc = base_address + opcode.GetBranchTarget(); + // Execute one more instruction due to the delay slot. + return Step(true); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", + static_cast(opcode.operation.Value())); + } + + // An instruction with the Exit flag will not actually + // cause an exit if it's executed inside a delay slot. + if (opcode.is_exit && !is_delay_slot) { + // Exit has a delay slot, execute the next instruction + Step(true); + return false; + } + + return true; +} + +u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { + switch (operation) { + case Macro::ALUOperation::Add: { + const u64 result{static_cast(src_a) + src_b}; + carry_flag = result > 0xffffffff; + return static_cast(result); + } + case Macro::ALUOperation::AddWithCarry: { + const u64 result{static_cast(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; + carry_flag = result > 0xffffffff; + return static_cast(result); + } + case Macro::ALUOperation::Subtract: { + const u64 result{static_cast(src_a) - src_b}; + carry_flag = result < 0x100000000; + return static_cast(result); + } + case Macro::ALUOperation::SubtractWithBorrow: { + const u64 result{static_cast(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; + carry_flag = result < 0x100000000; + return static_cast(result); + } + case Macro::ALUOperation::Xor: + return src_a ^ src_b; + case Macro::ALUOperation::Or: + return src_a | src_b; + case Macro::ALUOperation::And: + return src_a & src_b; + case Macro::ALUOperation::AndNot: + return src_a & ~src_b; + case Macro::ALUOperation::Nand: + return ~(src_a & src_b); + + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast(operation)); + return 0; + } +} + +void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { + switch (operation) { + case Macro::ResultOperation::IgnoreAndFetch: + // Fetch parameter and ignore result. + SetRegister(reg, FetchParameter()); + break; + case Macro::ResultOperation::Move: + // Move result. + SetRegister(reg, result); + break; + case Macro::ResultOperation::MoveAndSetMethod: + // Move result and use as Method Address. + SetRegister(reg, result); + SetMethodAddress(result); + break; + case Macro::ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, FetchParameter()); + Send(result); + break; + case Macro::ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, result); + Send(result); + break; + case Macro::ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, FetchParameter()); + SetMethodAddress(result); + break; + case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, result); + SetMethodAddress(result); + Send(FetchParameter()); + break; + case Macro::ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, result); + SetMethodAddress(result); + Send((result >> 12) & 0b111111); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented result operation {}", static_cast(operation)); + } +} + +bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { + switch (cond) { + case Macro::BranchCondition::Zero: + return value == 0; + case Macro::BranchCondition::NotZero: + return value != 0; + } + UNREACHABLE(); + return true; +} + +Macro::Opcode MacroInterpreterImpl::GetOpcode() const { + ASSERT((pc % sizeof(u32)) == 0); + ASSERT(pc < code.size() * sizeof(u32)); + return {code[pc / sizeof(u32)]}; +} + +u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { + return registers.at(register_id); +} + +void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { + // Register 0 is hardwired as the zero register. + // Ensure no writes to it actually occur. + if (register_id == 0) { + return; + } + + registers.at(register_id) = value; +} + +void MacroInterpreterImpl::SetMethodAddress(u32 address) { + method_address.raw = address; +} + +void MacroInterpreterImpl::Send(u32 value) { + maxwell3d.CallMethodFromMME(method_address.address, value); + // Increment the method address by the method increment. + method_address.address.Assign(method_address.address.Value() + + method_address.increment.Value()); +} + +u32 MacroInterpreterImpl::Read(u32 method) const { + return maxwell3d.GetRegisterValue(method); +} + +u32 MacroInterpreterImpl::FetchParameter() { + ASSERT(next_parameter_index < num_parameters); + return parameters[next_parameter_index++]; +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h new file mode 100644 index 000000000..fb923f7b9 --- /dev/null +++ b/src/video_core/macro/macro_interpreter.h @@ -0,0 +1,102 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/macro/macro.h" + +namespace Tegra { +namespace Engines { +class Maxwell3D; +} + +class MacroInterpreter final : public MacroEngine { +public: + explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); + +protected: + std::unique_ptr Compile(const std::vector& code) override; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class MacroInterpreterImpl : public CachedMacro { +public: + MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector& code); + void Execute(std::vector& parameters, u32 method) override; + +private: + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * @param offset Offset to start execution at. + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Macro::Opcode GetOpcode() const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); + + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + Engines::Maxwell3D& maxwell3d; + + /// Current program counter + u32 pc; + /// Program counter to execute at after the delay slot is executed. + std::optional delayed_pc; + + /// General purpose macro registers. + std::array registers = {}; + + /// Method address to use for the next Send instruction. + Macro::MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::unique_ptr parameters; + std::size_t num_parameters = 0; + std::size_t parameters_capacity = 0; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; + + bool carry_flag = false; + const std::vector& code; +}; + +} // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp new file mode 100644 index 000000000..1b657236a --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -0,0 +1,633 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/x64/xbyak_util.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_interpreter.h" +#include "video_core/macro/macro_jit_x64.h" + +MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47)); +MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); + +namespace Tegra { +using JitFunction = void (MacroJITx64Impl::*)(Macro::Opcode opcode); +const std::array InstructionTable{ + &MacroJITx64Impl::Compile_ALU, + &MacroJITx64Impl::Compile_AddImmediate, + &MacroJITx64Impl::Compile_ExtractInsert, + &MacroJITx64Impl::Compile_ExtractShiftLeftImmediate, + &MacroJITx64Impl::Compile_ExtractShiftLeftRegister, + &MacroJITx64Impl::Compile_Read, + nullptr, + &MacroJITx64Impl::Compile_Branch, +}; + +static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; +static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; +static const Xbyak::Reg64 STATE = Xbyak::util::r11; +static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; +static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; +static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; +static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; +static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; +static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; + +static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ + PARAMETERS, + REGISTERS, + STATE, + NEXT_PARAMETER, + RESULT, + METHOD_ADDRESS, + BRANCH_HOLDER, +}); + +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + +std::unique_ptr MacroJITx64::Compile(const std::vector& code) { + return std::make_unique(maxwell3d, code); +} + +MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector& code) + : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { + Compile(); +} + +MacroJITx64Impl::~MacroJITx64Impl() = default; + +void MacroJITx64Impl::Execute(std::vector& parameters, u32 method) { + MICROPROFILE_SCOPE(MacroJitExecute); + ASSERT_OR_EXECUTE(program != nullptr, { return; }); + JITState state{}; + state.maxwell3d = &maxwell3d; + state.registers = {}; + state.parameters = parameters.data(); + program(&state); +} + +void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { + const bool is_a_zero = opcode.src_a == 0; + const bool is_b_zero = opcode.src_b == 0; + const bool valid_operation = !is_a_zero && !is_b_zero; + const bool is_move_operation = !is_a_zero && is_b_zero; + const bool has_zero_register = is_a_zero || is_b_zero; + + Xbyak::Reg64 src_a; + Xbyak::Reg32 src_b; + + if (!optimizer.zero_reg_skip) { + src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_b = Compile_GetRegister(opcode.src_b, ebx); + } else { + if (!is_a_zero) { + src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + } + if (!is_b_zero) { + src_b = Compile_GetRegister(opcode.src_b, ebx); + } + } + Xbyak::Label skip_carry{}; + + bool has_emitted = false; + + switch (opcode.alu_operation) { + case Macro::ALUOperation::Add: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + add(src_a, src_b); + } + } else { + add(src_a, src_b); + } + + if (!optimizer.can_skip_carry) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::AddWithCarry: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + adc(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Subtract: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + sub(src_a, src_b); + has_emitted = true; + } + } else { + sub(src_a, src_b); + has_emitted = true; + } + if (!optimizer.can_skip_carry && has_emitted) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::SubtractWithBorrow: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + sbb(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Xor: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + xor_(src_a, src_b); + } + } else { + xor_(src_a, src_b); + } + break; + case Macro::ALUOperation::Or: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + or_(src_a, src_b); + } + } else { + or_(src_a, src_b); + } + break; + case Macro::ALUOperation::And: + if (optimizer.zero_reg_skip) { + if (!has_zero_register) { + and_(src_a, src_b); + } + } else { + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::AndNot: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + not_(src_b); + and_(src_a, src_b); + } + } else { + not_(src_b); + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::Nand: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + and_(src_a, src_b); + not_(src_a); + } + } else { + and_(src_a, src_b); + not_(src_a); + } + break; + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", + static_cast(opcode.alu_operation.Value())); + break; + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { + if (optimizer.skip_dummy_addimmediate) { + // Games tend to use this as an exit instruction placeholder. It's to encode an instruction + // without doing anything. In our case we can just not emit anything. + if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { + return; + } + } + // Check for redundant moves + if (optimizer.optimize_for_method_move && + opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next_opcode.has_value()) { + const auto next = *next_opcode; + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + return; + } + } + } + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, RESULT); + auto src = Compile_GetRegister(opcode.src_b, eax); + + if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) { + shr(src, opcode.bf_src_bit); + } else if (opcode.bf_src_bit == 31) { + xor_(src, src); + } + // Don't bother masking the whole register since we're using a 32 bit register + if (opcode.bf_size != 31 && opcode.bf_size != 0) { + and_(src, opcode.GetBitfieldMask()); + } else if (opcode.bf_size == 0) { + xor_(src, src); + } + if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) { + shl(src, opcode.bf_dst_bit); + } else if (opcode.bf_dst_bit == 31) { + xor_(src, src); + } + + const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + if (mask != 0xffffffff) { + and_(dst, mask); + } + or_(dst, src); + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, eax); + auto src = Compile_GetRegister(opcode.src_b, RESULT); + + shr(src, al); + if (opcode.bf_size != 0 && opcode.bf_size != 31) { + and_(src, opcode.GetBitfieldMask()); + } else if (opcode.bf_size == 0) { + xor_(src, src); + } + + if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) { + shl(src, opcode.bf_dst_bit); + } else if (opcode.bf_dst_bit == 31) { + xor_(src, src); + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, eax); + auto src = Compile_GetRegister(opcode.src_b, RESULT); + + if (opcode.bf_src_bit != 0) { + shr(src, opcode.bf_src_bit); + } + + if (opcode.bf_size != 31) { + and_(src, opcode.GetBitfieldMask()); + } + shl(src, al); + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { + return maxwell3d->GetRegisterValue(method); +} + +static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethodFromMME(method_address.address, value); +} + +void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, qword[STATE]); + mov(Common::X64::ABI_PARAM2, RESULT); + Common::X64::CallFarFunction(*this, &Read); + Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + mov(RESULT, Common::X64::ABI_RETURN.cvt32()); + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { + Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, qword[STATE]); + mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); + mov(Common::X64::ABI_PARAM3, value); + Common::X64::CallFarFunction(*this, &Send); + Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + + Xbyak::Label dont_process{}; + // Get increment + test(METHOD_ADDRESS, 0x3f000); + // If zero, method address doesn't update + je(dont_process); + + mov(ecx, METHOD_ADDRESS); + and_(METHOD_ADDRESS, 0xfff); + shr(ecx, 12); + and_(ecx, 0x3f); + lea(eax, ptr[rcx + METHOD_ADDRESS_64]); + sal(ecx, 12); + or_(eax, ecx); + + mov(METHOD_ADDRESS, eax); + + L(dont_process); +} + +void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + const s32 jump_address = + static_cast(pc) + static_cast(opcode.GetBranchTarget() / sizeof(s32)); + + Xbyak::Label end; + auto value = Compile_GetRegister(opcode.src_a, eax); + test(value, value); + if (optimizer.has_delayed_pc) { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + jne(end, T_NEAR); + break; + case Macro::BranchCondition::NotZero: + je(end, T_NEAR); + break; + } + + if (opcode.branch_annul) { + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + } else { + Xbyak::Label handle_post_exit{}; + Xbyak::Label skip{}; + jmp(skip, T_NEAR); + if (opcode.is_exit) { + L(handle_post_exit); + // Execute 1 instruction + mov(BRANCH_HOLDER, end_of_code); + // Jump to next instruction to skip delay slot check + jmp(labels[jump_address], T_NEAR); + } else { + L(handle_post_exit); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + } + L(skip); + mov(BRANCH_HOLDER, handle_post_exit); + jmp(delay_skip[pc], T_NEAR); + } + } else { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + je(labels[jump_address], T_NEAR); + break; + case Macro::BranchCondition::NotZero: + jne(labels[jump_address], T_NEAR); + break; + } + } + + L(end); +} + +void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { + optimizer.can_skip_carry = true; + optimizer.has_delayed_pc = false; + for (auto raw_op : code) { + Macro::Opcode op{}; + op.raw = raw_op; + + if (op.operation == Macro::Operation::ALU) { + // Scan for any ALU operations which actually use the carry flag, if they don't exist in + // our current code we can skip emitting the carry flag handling operations + if (op.alu_operation == Macro::ALUOperation::AddWithCarry || + op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { + optimizer.can_skip_carry = false; + } + } + + if (op.operation == Macro::Operation::Branch) { + if (!op.branch_annul) { + optimizer.has_delayed_pc = true; + } + } + } +} + +void MacroJITx64Impl::Compile() { + MICROPROFILE_SCOPE(MacroJitCompile); + bool keep_executing = true; + labels.fill(Xbyak::Label()); + + Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + // JIT state + mov(STATE, Common::X64::ABI_PARAM1); + mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + + static_cast(offsetof(JITState, parameters))]); + mov(REGISTERS, Common::X64::ABI_PARAM1); + add(REGISTERS, static_cast(offsetof(JITState, registers))); + xor_(RESULT, RESULT); + xor_(METHOD_ADDRESS, METHOD_ADDRESS); + xor_(NEXT_PARAMETER, NEXT_PARAMETER); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + + mov(dword[REGISTERS + 4], Compile_FetchParameter()); + + // Track get register for zero registers and mark it as no-op + optimizer.zero_reg_skip = true; + + // AddImmediate tends to be used as a NOP instruction, if we detect this we can + // completely skip the entire code path and no emit anything + optimizer.skip_dummy_addimmediate = true; + + // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting + // one if our register isn't "dirty" + optimizer.optimize_for_method_move = true; + + // Check to see if we can skip emitting certain instructions + Optimizer_ScanFlags(); + + const u32 op_count = static_cast(code.size()); + for (u32 i = 0; i < op_count; i++) { + if (i < op_count - 1) { + pc = i + 1; + next_opcode = GetOpCode(); + } else { + next_opcode = {}; + } + pc = i; + Compile_NextInstruction(); + } + + L(end_of_code); + + Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + ret(); + ready(); + program = getCode(); +} + +bool MacroJITx64Impl::Compile_NextInstruction() { + const auto opcode = GetOpCode(); + if (labels[pc].getAddress()) { + return false; + } + + L(labels[pc]); + + const std::size_t op = static_cast(opcode.operation.Value()); + + if (InstructionTable[op] == nullptr) { + UNIMPLEMENTED_MSG("Unimplemented opcode {}", op); + } else { + ((*this).*InstructionTable[op])(opcode); + } + + if (optimizer.has_delayed_pc) { + if (opcode.is_exit) { + mov(rax, end_of_code); + test(BRANCH_HOLDER, BRANCH_HOLDER); + cmove(BRANCH_HOLDER, rax); + // Jump to next instruction to skip delay slot check + je(labels[pc + 1], T_NEAR); + } else { + // TODO(ogniK): Optimize delay slot branching + Xbyak::Label no_delay_slot{}; + test(BRANCH_HOLDER, BRANCH_HOLDER); + je(no_delay_slot, T_NEAR); + mov(rax, BRANCH_HOLDER); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(rax); + L(no_delay_slot); + } + L(delay_skip[pc]); + if (opcode.is_exit) { + return false; + } + } else { + test(BRANCH_HOLDER, BRANCH_HOLDER); + jne(end_of_code, T_NEAR); + if (opcode.is_exit) { + inc(BRANCH_HOLDER); + return false; + } + } + return true; +} + +Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { + mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); + inc(NEXT_PARAMETER); + return eax; +} + +Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { + if (index == 0) { + // Register 0 is always zero + xor_(dst, dst); + } else { + mov(dst, dword[REGISTERS + index * sizeof(u32)]); + } + + return dst; +} + +Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { + if (index == 0) { + // Register 0 is always zero + xor_(dst, dst); + } else { + mov(dst, dword[REGISTERS + index * sizeof(u32)]); + } + + return dst; +} + +void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { + Xbyak::Label zero{}, end{}; + xor_(ecx, ecx); + shr(dst, 32); + setne(cl); + mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); +} + +void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { + auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { + // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero + // register. + if (reg == 0) { + return; + } + mov(dword[REGISTERS + reg * sizeof(u32)], result); + }; + auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; + + switch (operation) { + case Macro::ResultOperation::IgnoreAndFetch: + SetRegister(reg, Compile_FetchParameter()); + break; + case Macro::ResultOperation::Move: + SetRegister(reg, RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethod: + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, Compile_FetchParameter()); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, RESULT); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, Compile_FetchParameter()); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + Compile_Send(Compile_FetchParameter()); + break; + case Macro::ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + shr(RESULT, 12); + and_(RESULT, 0b111111); + Compile_Send(RESULT); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast(operation)); + } +} + +Macro::Opcode MacroJITx64Impl::GetOpCode() const { + ASSERT(pc < code.size()); + return {code[pc]}; +} + +std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { + return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h new file mode 100644 index 000000000..71cd6a3b0 --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.h @@ -0,0 +1,98 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/bit_field.h" +#include "common/common_types.h" +#include "common/x64/xbyak_abi.h" +#include "video_core/macro/macro.h" + +namespace Tegra { +namespace Engines { +class Maxwell3D; +} + +/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games +constexpr size_t MAX_CODE_SIZE = 0x10000; + +class MacroJITx64 final : public MacroEngine { +public: + explicit MacroJITx64(Engines::Maxwell3D& maxwell3d); + +protected: + std::unique_ptr Compile(const std::vector& code) override; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { +public: + MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector& code); + ~MacroJITx64Impl(); + void Execute(std::vector& parameters, u32 method) override; + + void Compile_ALU(Macro::Opcode opcode); + void Compile_AddImmediate(Macro::Opcode opcode); + void Compile_ExtractInsert(Macro::Opcode opcode); + void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); + void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); + void Compile_Read(Macro::Opcode opcode); + void Compile_Branch(Macro::Opcode opcode); + +private: + void Optimizer_ScanFlags(); + + void Compile(); + bool Compile_NextInstruction(); + + Xbyak::Reg32 Compile_FetchParameter(); + Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); + Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst); + void Compile_WriteCarry(Xbyak::Reg64 dst); + + void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); + void Compile_Send(Xbyak::Reg32 value); + + Macro::Opcode GetOpCode() const; + std::bitset<32> PersistentCallerSavedRegs() const; + + struct JITState { + Engines::Maxwell3D* maxwell3d{}; + std::array registers{}; + u32* parameters{}; + u32 carry_flag{}; + }; + static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); + using ProgramType = void (*)(JITState*); + + struct OptimizerState { + bool can_skip_carry{}; + bool has_delayed_pc{}; + bool zero_reg_skip{}; + bool skip_dummy_addimmediate{}; + bool optimize_for_method_move{}; + }; + OptimizerState optimizer{}; + + std::optional next_opcode{}; + ProgramType program{nullptr}; + + std::array labels; + std::array delay_skip{}; + Xbyak::Label end_of_code{}; + + bool is_delay_slot{}; + u32 pc{}; + std::optional delayed_pc; + + const std::vector& code; + Engines::Maxwell3D& maxwell3d; +}; + +} // namespace Tegra -- cgit v1.2.3 From 8118ea160b194fbcc600c44bff3be556b249c780 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Sat, 30 May 2020 12:23:58 +1000 Subject: Favor switch case over jump table Easier to read and will emit a jump table automatically. --- src/video_core/macro/macro_jit_x64.cpp | 43 ++++++++++++++++++++-------------- src/video_core/macro/macro_jit_x64.h | 1 + 2 files changed, 26 insertions(+), 18 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1b657236a..48501e582 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,18 +14,6 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -using JitFunction = void (MacroJITx64Impl::*)(Macro::Opcode opcode); -const std::array InstructionTable{ - &MacroJITx64Impl::Compile_ALU, - &MacroJITx64Impl::Compile_AddImmediate, - &MacroJITx64Impl::Compile_ExtractInsert, - &MacroJITx64Impl::Compile_ExtractShiftLeftImmediate, - &MacroJITx64Impl::Compile_ExtractShiftLeftRegister, - &MacroJITx64Impl::Compile_Read, - nullptr, - &MacroJITx64Impl::Compile_Branch, -}; - static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; @@ -489,12 +477,31 @@ bool MacroJITx64Impl::Compile_NextInstruction() { L(labels[pc]); - const std::size_t op = static_cast(opcode.operation.Value()); - - if (InstructionTable[op] == nullptr) { - UNIMPLEMENTED_MSG("Unimplemented opcode {}", op); - } else { - ((*this).*InstructionTable[op])(opcode); + switch (opcode.operation) { + case Macro::Operation::ALU: + Compile_ALU(opcode); + break; + case Macro::Operation::AddImmediate: + Compile_AddImmediate(opcode); + break; + case Macro::Operation::ExtractInsert: + Compile_ExtractInsert(opcode); + break; + case Macro::Operation::ExtractShiftLeftImmediate: + Compile_ExtractShiftLeftImmediate(opcode); + break; + case Macro::Operation::ExtractShiftLeftRegister: + Compile_ExtractShiftLeftRegister(opcode); + break; + case Macro::Operation::Read: + Compile_Read(opcode); + break; + case Macro::Operation::Branch: + Compile_Branch(opcode); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); + break; } if (optimizer.has_delayed_pc) { diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 71cd6a3b0..729ed7713 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -35,6 +35,7 @@ class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { public: MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector& code); ~MacroJITx64Impl(); + void Execute(std::vector& parameters, u32 method) override; void Compile_ALU(Macro::Opcode opcode); -- cgit v1.2.3 From 3a20e74f4056a03253e563510048e99ea548d5b4 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Tue, 2 Jun 2020 16:37:06 +1000 Subject: Pass by reference instead of copying parameters --- src/video_core/macro/macro.cpp | 2 +- src/video_core/macro/macro.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 85a6e5dd4..d05a88479 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -15,7 +15,7 @@ void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(u32 method, std::vector parameters) { +void MacroEngine::Execute(u32 method, std::vector& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { compiled_macro->second->Execute(parameters, method); diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 28ca243d1..49fc4d6bc 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -113,7 +113,7 @@ public: void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(u32 method, std::vector parameters); + void Execute(u32 method, std::vector& parameters); protected: virtual std::unique_ptr Compile(const std::vector& code) = 0; -- cgit v1.2.3 From 411f5527d41ba5c4f09b914b4fb4df0c6493f744 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Wed, 3 Jun 2020 16:33:38 +1000 Subject: Mark parameters as const --- src/video_core/macro/macro.cpp | 2 +- src/video_core/macro/macro.h | 4 ++-- src/video_core/macro/macro_interpreter.cpp | 2 +- src/video_core/macro/macro_interpreter.h | 2 +- src/video_core/macro/macro_jit_x64.cpp | 2 +- src/video_core/macro/macro_jit_x64.h | 5 +++-- 6 files changed, 9 insertions(+), 8 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index d05a88479..89077a2d8 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -15,7 +15,7 @@ void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(u32 method, std::vector& parameters) { +void MacroEngine::Execute(u32 method, const std::vector& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { compiled_macro->second->Execute(parameters, method); diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 49fc4d6bc..b76ed891f 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -102,7 +102,7 @@ public: * @param code The macro byte code to execute * @param parameters The parameters of the macro */ - virtual void Execute(std::vector& parameters, u32 method) = 0; + virtual void Execute(const std::vector& parameters, u32 method) = 0; }; class MacroEngine { @@ -113,7 +113,7 @@ public: void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(u32 method, std::vector& parameters); + void Execute(u32 method, const std::vector& parameters); protected: virtual std::unique_ptr Compile(const std::vector& code) = 0; diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index e63296a21..5edff27aa 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -21,7 +21,7 @@ MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector& code) : maxwell3d(maxwell3d), code(code) {} -void MacroInterpreterImpl::Execute(std::vector& parameters, u32 method) { +void MacroInterpreterImpl::Execute(const std::vector& parameters, u32 method) { MICROPROFILE_SCOPE(MacroInterp); Reset(); diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index fb923f7b9..90217fc89 100644 --- a/src/video_core/macro/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h @@ -29,7 +29,7 @@ private: class MacroInterpreterImpl : public CachedMacro { public: MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector& code); - void Execute(std::vector& parameters, u32 method) override; + void Execute(const std::vector& parameters, u32 method) override; private: /// Resets the execution engine state, zeroing registers, etc. diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 48501e582..11c1cc3be 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -47,7 +47,7 @@ MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vecto MacroJITx64Impl::~MacroJITx64Impl() = default; -void MacroJITx64Impl::Execute(std::vector& parameters, u32 method) { +void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { MICROPROFILE_SCOPE(MacroJitExecute); ASSERT_OR_EXECUTE(program != nullptr, { return; }); JITState state{}; diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 729ed7713..6152cb501 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -13,6 +13,7 @@ #include "video_core/macro/macro.h" namespace Tegra { + namespace Engines { class Maxwell3D; } @@ -36,7 +37,7 @@ public: MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector& code); ~MacroJITx64Impl(); - void Execute(std::vector& parameters, u32 method) override; + void Execute(const std::vector& parameters, u32 method) override; void Compile_ALU(Macro::Opcode opcode); void Compile_AddImmediate(Macro::Opcode opcode); @@ -66,7 +67,7 @@ private: struct JITState { Engines::Maxwell3D* maxwell3d{}; std::array registers{}; - u32* parameters{}; + const u32* parameters{}; u32 carry_flag{}; }; static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); -- cgit v1.2.3 From eca3d16e548451f403cc4cc2e9fb38ed96693102 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Thu, 4 Jun 2020 22:23:07 +1000 Subject: Default init labels and use initializer list for macro engine --- src/video_core/macro/macro_jit_x64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 6152cb501..21ee157cf 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -85,7 +85,7 @@ private: std::optional next_opcode{}; ProgramType program{nullptr}; - std::array labels; + std::array labels{}; std::array delay_skip{}; Xbyak::Label end_of_code{}; -- cgit v1.2.3 From 6e5d8aac4d8e573dc12a9bc8d93621f17323df5e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Jun 2020 05:16:54 -0300 Subject: video_core/macro_jit_x64: Remove initializer in member variable Fix build time issues on gcc. Confirmed through asan that avoiding this initialization is safe. --- src/video_core/macro/macro_jit_x64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 21ee157cf..71f738b9a 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -85,8 +85,8 @@ private: std::optional next_opcode{}; ProgramType program{nullptr}; - std::array labels{}; - std::array delay_skip{}; + std::array labels; + std::array delay_skip; Xbyak::Label end_of_code{}; bool is_delay_slot{}; -- cgit v1.2.3 From d563017dfe63aaa26e7c08369995838f8b9fdafb Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 18:59:54 +0100 Subject: xbyak_abi: Remove *GPS variants of stack manipulation functions --- src/video_core/macro/macro_jit_x64.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 11c1cc3be..2d82c8cff 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -302,22 +302,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { sub(result, opcode.immediate * -1); } } - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, RESULT); Common::X64::CallFarFunction(*this, &Read); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(RESULT, Common::X64::ABI_RETURN.cvt32()); Compile_ProcessResult(opcode.result_operation, opcode.dst); } void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); mov(Common::X64::ABI_PARAM3, value); Common::X64::CallFarFunction(*this, &Send); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Xbyak::Label dont_process{}; // Get increment @@ -421,7 +421,7 @@ void MacroJITx64Impl::Compile() { bool keep_executing = true; labels.fill(Xbyak::Label()); - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); // JIT state mov(STATE, Common::X64::ABI_PARAM1); mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + @@ -463,7 +463,7 @@ void MacroJITx64Impl::Compile() { L(end_of_code); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); ret(); ready(); program = getCode(); -- cgit v1.2.3 From a6a43a5ae047404ca0b03aa647ed5b17400ca7b6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:28:30 +0100 Subject: macro_jit_x64: Remove RESULT_64 This Reg64 codepath has the exact same behaviour as the Reg32 one. --- src/video_core/macro/macro_jit_x64.cpp | 18 +++--------------- src/video_core/macro/macro_jit_x64.h | 1 - 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 11c1cc3be..9a9d50866 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -19,7 +19,6 @@ static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; -static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; @@ -64,15 +63,15 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; - Xbyak::Reg64 src_a; + Xbyak::Reg32 src_a; Xbyak::Reg32 src_b; if (!optimizer.zero_reg_skip) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_a = Compile_GetRegister(opcode.src_a, RESULT); src_b = Compile_GetRegister(opcode.src_b, ebx); } else { if (!is_a_zero) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_a = Compile_GetRegister(opcode.src_a, RESULT); } if (!is_b_zero) { src_b = Compile_GetRegister(opcode.src_b, ebx); @@ -553,17 +552,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { return dst; } -Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { - if (index == 0) { - // Register 0 is always zero - xor_(dst, dst); - } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); - } - - return dst; -} - void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { Xbyak::Label zero{}, end{}; xor_(ecx, ecx); diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 21ee157cf..377368086 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -55,7 +55,6 @@ private: Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst); void Compile_WriteCarry(Xbyak::Reg64 dst); void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); -- cgit v1.2.3 From 389549b80d7cd7054ec622f4038ff599386e1c04 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:51:33 +0100 Subject: macro_jit_x64: Remove METHOD_ADDRESS_64 Unnecessary variable. --- src/video_core/macro/macro_jit_x64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 9a9d50866..1dcf9957c 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -20,7 +20,6 @@ static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; -static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ @@ -328,7 +327,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { and_(METHOD_ADDRESS, 0xfff); shr(ecx, 12); and_(ecx, 0x3f); - lea(eax, ptr[rcx + METHOD_ADDRESS_64]); + lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); sal(ecx, 12); or_(eax, ecx); -- cgit v1.2.3 From 35db6e1c68f18f401bcae8bd8e8937648c7c67c6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:55:02 +0100 Subject: macro_jit_x64: Remove JITState::parameters This can be passed in as an argument instead. --- src/video_core/macro/macro_jit_x64.cpp | 6 ++---- src/video_core/macro/macro_jit_x64.h | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1dcf9957c..f1d123f51 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -51,8 +51,7 @@ void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { JITState state{}; state.maxwell3d = &maxwell3d; state.registers = {}; - state.parameters = parameters.data(); - program(&state); + program(&state, parameters.data()); } void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { @@ -422,8 +421,7 @@ void MacroJITx64Impl::Compile() { Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); // JIT state mov(STATE, Common::X64::ABI_PARAM1); - mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + - static_cast(offsetof(JITState, parameters))]); + mov(PARAMETERS, Common::X64::ABI_PARAM2); mov(REGISTERS, Common::X64::ABI_PARAM1); add(REGISTERS, static_cast(offsetof(JITState, registers))); xor_(RESULT, RESULT); diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 377368086..9167b2a93 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -66,11 +66,10 @@ private: struct JITState { Engines::Maxwell3D* maxwell3d{}; std::array registers{}; - const u32* parameters{}; u32 carry_flag{}; }; static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*); + using ProgramType = void (*)(JITState*, const u32*); struct OptimizerState { bool can_skip_carry{}; -- cgit v1.2.3 From 79aa7b3aceeecadfb5b15bc25431db7768434f23 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:00:59 +0100 Subject: macro_jit_x64: Remove REGISTERS Unnecessary since this is just an offset from STATE. --- src/video_core/macro/macro_jit_x64.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index f1d123f51..da3b86d3d 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -15,7 +15,6 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255 namespace Tegra { static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; -static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; @@ -24,7 +23,6 @@ static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ PARAMETERS, - REGISTERS, STATE, NEXT_PARAMETER, RESULT, @@ -422,14 +420,12 @@ void MacroJITx64Impl::Compile() { // JIT state mov(STATE, Common::X64::ABI_PARAM1); mov(PARAMETERS, Common::X64::ABI_PARAM2); - mov(REGISTERS, Common::X64::ABI_PARAM1); - add(REGISTERS, static_cast(offsetof(JITState, registers))); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); xor_(NEXT_PARAMETER, NEXT_PARAMETER); xor_(BRANCH_HOLDER, BRANCH_HOLDER); - mov(dword[REGISTERS + 4], Compile_FetchParameter()); + mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); // Track get register for zero registers and mark it as no-op optimizer.zero_reg_skip = true; @@ -543,7 +539,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { // Register 0 is always zero xor_(dst, dst); } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); + mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); } return dst; @@ -564,7 +560,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 if (reg == 0) { return; } - mov(dword[REGISTERS + reg * sizeof(u32)], result); + mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); }; auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; -- cgit v1.2.3 From c09a9e5cc7f53280218cdfbfd7d7ff056f1c2ff5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:12:53 +0100 Subject: macro_jit_x64: Select better registers All registers are now callee-save registers. RBX and RBP selected for STATE and RESULT because these are most commonly accessed; this is to avoid the REX prefix. RBP not used for STATE because there are some SIB restrictions, RBX emits smaller code. --- src/video_core/macro/macro_jit_x64.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index da3b86d3d..1e7b05ac9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,18 +14,18 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; -static const Xbyak::Reg64 STATE = Xbyak::util::r11; -static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; -static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; +static const Xbyak::Reg64 STATE = Xbyak::util::rbx; +static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; +static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ - PARAMETERS, STATE, - NEXT_PARAMETER, RESULT, + PARAMETERS, + NEXT_PARAMETER, METHOD_ADDRESS, BRANCH_HOLDER, }); @@ -64,13 +64,13 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { if (!optimizer.zero_reg_skip) { src_a = Compile_GetRegister(opcode.src_a, RESULT); - src_b = Compile_GetRegister(opcode.src_b, ebx); + src_b = Compile_GetRegister(opcode.src_b, eax); } else { if (!is_a_zero) { src_a = Compile_GetRegister(opcode.src_a, RESULT); } if (!is_b_zero) { - src_b = Compile_GetRegister(opcode.src_b, ebx); + src_b = Compile_GetRegister(opcode.src_b, eax); } } Xbyak::Label skip_carry{}; -- cgit v1.2.3 From 1799f4e7743557c8e41c15201c42431f8d6d6dde Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:14:10 +0100 Subject: macro_jit_x64: Remove unused function Compile_WriteCarry --- src/video_core/macro/macro_jit_x64.cpp | 8 -------- src/video_core/macro/macro_jit_x64.h | 1 - 2 files changed, 9 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1e7b05ac9..b703daad9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -545,14 +545,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { return dst; } -void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { - Xbyak::Label zero{}, end{}; - xor_(ecx, ecx); - shr(dst, 32); - setne(cl); - mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); -} - void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 9167b2a93..a05d8df15 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -55,7 +55,6 @@ private: Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - void Compile_WriteCarry(Xbyak::Reg64 dst); void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); void Compile_Send(Xbyak::Reg32 value); -- cgit v1.2.3 From cf0aad7d6a22024362c7adf04b605108141453f6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:16:47 +0100 Subject: macro_jit_x64: Remove NEXT_PARAMETER Not required, as PARAMETERS can just be incremented directly. --- src/video_core/macro/macro_jit_x64.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index b703daad9..2eb98173d 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -17,7 +17,6 @@ namespace Tegra { static const Xbyak::Reg64 STATE = Xbyak::util::rbx; static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; -static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; @@ -25,7 +24,6 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, PARAMETERS, - NEXT_PARAMETER, METHOD_ADDRESS, BRANCH_HOLDER, }); @@ -422,7 +420,6 @@ void MacroJITx64Impl::Compile() { mov(PARAMETERS, Common::X64::ABI_PARAM2); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); - xor_(NEXT_PARAMETER, NEXT_PARAMETER); xor_(BRANCH_HOLDER, BRANCH_HOLDER); mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); @@ -529,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { } Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { - mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); - inc(NEXT_PARAMETER); + mov(eax, dword[PARAMETERS]); + add(PARAMETERS, sizeof(u32)); return eax; } -- cgit v1.2.3 From a6ddd7c382e0362d5e86c1622c85c78c59c5aa3b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 22:01:00 +0100 Subject: macro_jit_x64: Should not skip zero registers for certain ALU ops The code generated for these ALU ops assume src_a and src_b are always valid. --- src/video_core/macro/macro_jit_x64.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 2eb98173d..08279b9bc 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -56,11 +56,13 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool valid_operation = !is_a_zero && !is_b_zero; const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; + const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || + opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; Xbyak::Reg32 src_a; Xbyak::Reg32 src_b; - if (!optimizer.zero_reg_skip) { + if (!optimizer.zero_reg_skip || no_zero_reg_skip) { src_a = Compile_GetRegister(opcode.src_a, RESULT); src_b = Compile_GetRegister(opcode.src_b, eax); } else { -- cgit v1.2.3 From c409722435bdb1f2eae4d192c89278e3b07fd2ed Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 22:01:25 +0100 Subject: macro_jit_x64: Optimization implicitly assumes same destination --- src/video_core/macro/macro_jit_x64.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 08279b9bc..30a7e1fe9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -185,7 +185,8 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { if (next_opcode.has_value()) { const auto next = *next_opcode; - if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && + opcode.dst == next.dst) { return; } } -- cgit v1.2.3 From 44f10d9b9f4ac6fb718718a85a5916721e7944e4 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:03:32 +0100 Subject: macro_jit_x64: Inline Engines::Maxwell3D::GetRegisterValue --- src/video_core/macro/macro_jit_x64.cpp | 23 +++++++++++++++++------ src/video_core/macro/macro_jit_x64.h | 1 + 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index d4a97ec7b..0b2918388 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -295,12 +295,20 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { sub(result, opcode.immediate * -1); } } - Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(Common::X64::ABI_PARAM1, qword[STATE]); - mov(Common::X64::ABI_PARAM2, RESULT); - Common::X64::CallFarFunction(*this, &Read); - Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(RESULT, Common::X64::ABI_RETURN.cvt32()); + + // Equivalent to Engines::Maxwell3D::GetRegisterValue: + if (optimizer.enable_asserts) { + Xbyak::Label pass_range_check; + cmp(RESULT, static_cast(Engines::Maxwell3D::Regs::NUM_REGS)); + jb(pass_range_check); + int3(); + L(pass_range_check); + } + mov(rax, qword[STATE]); + mov(RESULT, + dword[rax + offsetof(Engines::Maxwell3D, regs) + + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } @@ -435,6 +443,9 @@ void MacroJITx64Impl::Compile() { // one if our register isn't "dirty" optimizer.optimize_for_method_move = true; + // Enable run-time assertions in JITted code + optimizer.enable_asserts = false; + // Check to see if we can skip emitting certain instructions Optimizer_ScanFlags(); diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 51ec090b8..a180e7428 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -76,6 +76,7 @@ private: bool zero_reg_skip{}; bool skip_dummy_addimmediate{}; bool optimize_for_method_move{}; + bool enable_asserts{}; }; OptimizerState optimizer{}; -- cgit v1.2.3 From 977ceb405627adfec8be6240521d1db8842b8fc2 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 19 Jun 2020 11:39:41 +0100 Subject: macro_jit_x64: Remove unused function Read --- src/video_core/macro/macro_jit_x64.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 0b2918388..80a00ba77 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -270,14 +270,6 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } -static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { - return maxwell3d->GetRegisterValue(method); -} - -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethodFromMME(method_address.address, value); -} - void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { if (optimizer.zero_reg_skip && opcode.src_a == 0) { if (opcode.immediate == 0) { @@ -312,6 +304,10 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } +static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethodFromMME(method_address.address, value); +} + void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); -- cgit v1.2.3 From 811bff009eca0d0fa2ddb1455fc73fdaec4474da Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 21:57:41 -0400 Subject: macro_jit_x64: Eliminate variable shadowing in Compile_ProcessResult() We can reduce the capture scope so that it's not possible for both "reg" variables to clash with one another. While we're at it, we can prevent unnecessary copies while we're at it. --- src/video_core/macro/macro_jit_x64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index bee34a7c0..9eface47e 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -546,7 +546,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { } void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { - auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { + const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // register. if (reg == 0) { @@ -554,7 +554,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 } mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); }; - auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; switch (operation) { case Macro::ResultOperation::IgnoreAndFetch: -- cgit v1.2.3 From 8ea749c1ca50b3584df8c8d7019933fe80e31d9f Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 22:10:43 -0400 Subject: macro_jit_x64: Remove unused variable Removes a completely unused label and marks another variable as unused, given it seems like it has potential uses in the future. --- src/video_core/macro/macro_jit_x64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index bee34a7c0..3515ea43b 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -54,7 +54,7 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool is_a_zero = opcode.src_a == 0; const bool is_b_zero = opcode.src_b == 0; const bool valid_operation = !is_a_zero && !is_b_zero; - const bool is_move_operation = !is_a_zero && is_b_zero; + [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; @@ -73,7 +73,6 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { src_b = Compile_GetRegister(opcode.src_b, eax); } } - Xbyak::Label skip_carry{}; bool has_emitted = false; -- cgit v1.2.3 From 140f953b6a70fa2eaf3f2711993913f6f0ca7a75 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 22:33:01 -0400 Subject: macro_jit_x64: Correct readability of Compile_ExtractShiftLeftRegister() Previously dst wasn't being used. --- src/video_core/macro/macro_jit_x64.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index bee34a7c0..1ecf1d27f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -259,8 +259,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); if (opcode.bf_src_bit != 0) { shr(src, opcode.bf_src_bit); @@ -269,7 +269,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { if (opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } - shl(src, al); + shl(src, dst.cvt8()); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } -- cgit v1.2.3 From 5a4e89b9018eec802ec445b5c7df7d270d35b4c1 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 22:56:34 -0400 Subject: macro_jit_x64: Correct readability of Compile_ExtractShiftLeftImmediate() Previously dst wasn't being used. --- src/video_core/macro/macro_jit_x64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1ecf1d27f..202fbbc21 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -240,10 +240,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); - shr(src, al); + shr(src, dst.cvt8()); if (opcode.bf_size != 0 && opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } else if (opcode.bf_size == 0) { -- cgit v1.2.3 From c12eb814b41b5b354df2548d5d48b9ae529ad4b8 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 20 Jun 2020 22:23:58 +0100 Subject: macro_jit_x64: Use ecx for shift register shl/shr only accept cl as their second argument --- src/video_core/macro/macro_jit_x64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 4eef342ec..389b58989 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -239,7 +239,7 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { - const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto src = Compile_GetRegister(opcode.src_b, RESULT); shr(src, dst.cvt8()); @@ -258,7 +258,7 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { - const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto src = Compile_GetRegister(opcode.src_b, RESULT); if (opcode.bf_src_bit != 0) { -- cgit v1.2.3 From 6ce5f3120be6a65a798d3abc6fda0fe6171d0296 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Fri, 5 Jun 2020 01:42:19 +1000 Subject: Macro HLE support --- src/video_core/macro/macro.cpp | 35 ++++++++-- src/video_core/macro/macro.h | 19 ++++- src/video_core/macro/macro_hle.cpp | 108 +++++++++++++++++++++++++++++ src/video_core/macro/macro_hle.h | 43 ++++++++++++ src/video_core/macro/macro_interpreter.cpp | 3 +- src/video_core/macro/macro_jit_x64.cpp | 3 +- 6 files changed, 202 insertions(+), 9 deletions(-) create mode 100644 src/video_core/macro/macro_hle.cpp create mode 100644 src/video_core/macro/macro_hle.h (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 89077a2d8..c8aa2534a 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,23 +2,37 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include "common/assert.h" #include "common/logging/log.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" +#include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" #include "video_core/macro/macro_jit_x64.h" namespace Tegra { +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) + : hle_macros{std::make_unique(maxwell3d)} {} + +MacroEngine::~MacroEngine() {} + void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(u32 method, const std::vector& parameters) { +void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, + const std::vector& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { - compiled_macro->second->Execute(parameters, method); + const auto& cache_info = compiled_macro->second; + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } } else { // Macro not compiled, check if it's uploaded and if so, compile it auto macro_code = uploaded_macro_code.find(method); @@ -26,8 +40,21 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); return; } - macro_cache[method] = Compile(macro_code->second); - macro_cache[method]->Execute(parameters, method); + auto& cache_info = macro_cache[method]; + cache_info.hash = boost::hash_value(macro_code->second); + cache_info.lle_program = Compile(macro_code->second); + + auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); + if (hle_program.has_value()) { + cache_info.has_hle_program = true; + cache_info.hle_program = std::move(hle_program.value()); + } + + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } } } diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index b76ed891f..5fa8023af 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -11,9 +11,11 @@ #include "common/common_types.h" namespace Tegra { + namespace Engines { class Maxwell3D; } + namespace Macro { constexpr std::size_t NUM_MACRO_REGISTERS = 8; enum class Operation : u32 { @@ -94,6 +96,8 @@ union MethodAddress { } // namespace Macro +class HLEMacro; + class CachedMacro { public: virtual ~CachedMacro() = default; @@ -107,20 +111,29 @@ public: class MacroEngine { public: - virtual ~MacroEngine() = default; + MacroEngine(Engines::Maxwell3D& maxwell3d); + virtual ~MacroEngine(); // Store the uploaded macro code to compile them when they're called. void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(u32 method, const std::vector& parameters); + void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector& parameters); protected: virtual std::unique_ptr Compile(const std::vector& code) = 0; private: - std::unordered_map> macro_cache; + struct CacheInfo { + std::unique_ptr lle_program{}; + std::unique_ptr hle_program{}; + u64 hash{}; + bool has_hle_program{}; + }; + + std::unordered_map macro_cache; std::unordered_map> uploaded_macro_code; + std::unique_ptr hle_macros; }; std::unique_ptr GetMacroEngine(Engines::Maxwell3D& maxwell3d); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..51827c822 --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp @@ -0,0 +1,108 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_hle.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra { + +// HLE'd functions +static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, + const std::vector& parameters) { + const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + + maxwell3d.regs.draw.topology.Assign( + static_cast(parameters[0] & + ~(0x3ffffff << 26))); + maxwell3d.regs.vb_base_instance = parameters[5]; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.regs.vb_element_base = parameters[3]; + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.index_array.first = parameters[4]; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.index_array.count = 0; + maxwell3d.mme_draw.instance_count = 0; +} + +static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, + const std::vector& parameters) { + const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + + maxwell3d.regs.vertex_buffer.first = parameters[3]; + maxwell3d.regs.vertex_buffer.count = parameters[1]; + maxwell3d.regs.vb_base_instance = parameters[4]; + maxwell3d.regs.draw.topology.Assign( + static_cast(parameters[0])); + maxwell3d.mme_draw.instance_count = count; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(false, true); + } + maxwell3d.regs.vertex_buffer.count = 0; + maxwell3d.mme_draw.instance_count = 0; +} + +static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, + const std::vector& parameters) { + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.index_array.first = parameters[3]; + maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.vb_element_base = element_base; + maxwell3d.regs.vb_base_instance = base_instance; + maxwell3d.regs.const_buffer.cb_pos = 0x640; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.regs.const_buffer.cb_data[0] = element_base; + maxwell3d.regs.const_buffer.cb_data[1] = base_instance; + maxwell3d.regs.draw.topology.Assign( + static_cast(parameters[0])); + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? + maxwell3d.regs.index_array.count = 0; + maxwell3d.regs.vb_element_base = 0x0; + maxwell3d.regs.vb_base_instance = 0x0; + maxwell3d.regs.const_buffer.cb_pos = 0x640; + maxwell3d.regs.const_buffer.cb_data[0] = 0; + maxwell3d.regs.const_buffer.cb_data[1] = 0; + maxwell3d.mme_draw.instance_count = 0; +} + +static const std::unordered_map hle_funcs{ + {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, + {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, + {0x0217920100488FF7, &HLE_0217920100488FF7}, +}; + +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::~HLEMacro() = default; + +std::optional> HLEMacro::GetHLEProgram(u64 hash) const { + auto it = hle_funcs.find(hash); + if (it != hle_funcs.end()) { + return std::make_unique(maxwell3d, it->second); + } else { + return {}; + } +} + +HLEMacroImpl::~HLEMacroImpl() = default; + +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) + : maxwell3d(maxwell3d), func(func) {} + +void HLEMacroImpl::Execute(const std::vector& parameters, u32 method) { + func(maxwell3d, parameters); +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..de7f43dc4 --- /dev/null +++ b/src/video_core/macro/macro_hle.h @@ -0,0 +1,43 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "video_core/macro/macro.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector& parameters); + +class HLEMacro { +public: + HLEMacro(Engines::Maxwell3D& maxwell3d); + ~HLEMacro(); + std::optional> GetHLEProgram(u64 hash) const; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); + ~HLEMacroImpl(); + + void Execute(const std::vector& parameters, u32 method) override; + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; + +} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 5edff27aa..aa5256419 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -11,7 +11,8 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr MacroInterpreter::Compile(const std::vector& code) { return std::make_unique(maxwell3d, code); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 30abb66e5..07292702f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr MacroJITx64::Compile(const std::vector& code) { return std::make_unique(maxwell3d, code); -- cgit v1.2.3 From 74b4334d510b58d96e8305bc3f5a7c8d05e842ba Mon Sep 17 00:00:00 2001 From: David Marcec Date: Fri, 5 Jun 2020 12:59:59 +1000 Subject: Fix constbuffer for 0217920100488FF7 --- src/video_core/macro/macro_hle.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 51827c822..887f40310 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -59,10 +59,10 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.regs.index_array.count = parameters[1]; maxwell3d.regs.vb_element_base = element_base; maxwell3d.regs.vb_base_instance = base_instance; - maxwell3d.regs.const_buffer.cb_pos = 0x640; maxwell3d.mme_draw.instance_count = instance_count; - maxwell3d.regs.const_buffer.cb_data[0] = element_base; - maxwell3d.regs.const_buffer.cb_data[1] = base_instance; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, element_base); + maxwell3d.CallMethodFromMME(0x8e5, base_instance); maxwell3d.regs.draw.topology.Assign( static_cast(parameters[0])); if (maxwell3d.ShouldExecute()) { @@ -72,10 +72,10 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.regs.index_array.count = 0; maxwell3d.regs.vb_element_base = 0x0; maxwell3d.regs.vb_base_instance = 0x0; - maxwell3d.regs.const_buffer.cb_pos = 0x640; - maxwell3d.regs.const_buffer.cb_data[0] = 0; - maxwell3d.regs.const_buffer.cb_data[1] = 0; maxwell3d.mme_draw.instance_count = 0; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, 0x0); + maxwell3d.CallMethodFromMME(0x8e5, 0x0); } static const std::unordered_map hle_funcs{ -- cgit v1.2.3 From fabdf5d3850c078d173653f259845c26a2ce6e7d Mon Sep 17 00:00:00 2001 From: David Marcec Date: Fri, 5 Jun 2020 13:09:52 +1000 Subject: Addressed issues --- src/video_core/macro/macro.cpp | 2 +- src/video_core/macro/macro.h | 2 +- src/video_core/macro/macro_hle.cpp | 20 ++++++++++---------- src/video_core/macro/macro_hle.h | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index c8aa2534a..ef7dad349 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -17,7 +17,7 @@ namespace Tegra { MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) : hle_macros{std::make_unique(maxwell3d)} {} -MacroEngine::~MacroEngine() {} +MacroEngine::~MacroEngine() = default; void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 5fa8023af..4d00b84b0 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -111,7 +111,7 @@ public: class MacroEngine { public: - MacroEngine(Engines::Maxwell3D& maxwell3d); + explicit MacroEngine(Engines::Maxwell3D& maxwell3d); virtual ~MacroEngine(); // Store the uploaded macro code to compile them when they're called. diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 887f40310..1f1348df3 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro_hle.h" @@ -78,22 +78,22 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.CallMethodFromMME(0x8e5, 0x0); } -static const std::unordered_map hle_funcs{ - {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, - {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, - {0x0217920100488FF7, &HLE_0217920100488FF7}, +static const std::array, 3> hle_funcs{ + std::make_pair(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), + std::make_pair(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), + std::make_pair(0x0217920100488FF7, &HLE_0217920100488FF7), }; HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} HLEMacro::~HLEMacro() = default; std::optional> HLEMacro::GetHLEProgram(u64 hash) const { - auto it = hle_funcs.find(hash); - if (it != hle_funcs.end()) { - return std::make_unique(maxwell3d, it->second); - } else { - return {}; + const auto it = std::find_if(hle_funcs.begin(), hle_funcs.end(), + [hash](auto& pair) { return pair.first == hash; }); + if (it == hle_funcs.end()) { + return std::nullopt; } + return std::make_unique(maxwell3d, it->second); } HLEMacroImpl::~HLEMacroImpl() = default; diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index de7f43dc4..7cd492a8f 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -20,7 +20,7 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector> GetHLEProgram(u64 hash) const; -- cgit v1.2.3 From 52340e94ac5a64572643f01a23316ad492a40f66 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Fri, 5 Jun 2020 14:00:00 +1000 Subject: clear mme draw mode We already draw, so we can clear it --- src/video_core/macro/macro_hle.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 1f1348df3..689533f6a 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -29,6 +29,7 @@ static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, } maxwell3d.regs.index_array.count = 0; maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, @@ -47,6 +48,7 @@ static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, } maxwell3d.regs.vertex_buffer.count = 0; maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, @@ -76,6 +78,7 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.CallMethodFromMME(0x8e3, 0x640); maxwell3d.CallMethodFromMME(0x8e4, 0x0); maxwell3d.CallMethodFromMME(0x8e5, 0x0); + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } static const std::array, 3> hle_funcs{ -- cgit v1.2.3 From f5e2aec4220ee2b72ec2986e0e60625897b2fd44 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Wed, 24 Jun 2020 12:18:33 +1000 Subject: addressed issues --- src/video_core/macro/macro_hle.cpp | 10 ++++++---- src/video_core/macro/macro_hle.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/video_core/macro') diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 689533f6a..410f99018 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -10,6 +10,7 @@ namespace Tegra { +namespace { // HLE'd functions static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { @@ -80,19 +81,20 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.CallMethodFromMME(0x8e5, 0x0); maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } +} // namespace -static const std::array, 3> hle_funcs{ +constexpr std::array, 3> hle_funcs{{ std::make_pair(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), std::make_pair(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), std::make_pair(0x0217920100488FF7, &HLE_0217920100488FF7), -}; +}}; HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} HLEMacro::~HLEMacro() = default; std::optional> HLEMacro::GetHLEProgram(u64 hash) const { - const auto it = std::find_if(hle_funcs.begin(), hle_funcs.end(), - [hash](auto& pair) { return pair.first == hash; }); + const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), + [hash](const auto& pair) { return pair.first == hash; }); if (it == hle_funcs.end()) { return std::nullopt; } diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index 7cd492a8f..37af875a0 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -22,6 +22,7 @@ class HLEMacro { public: explicit HLEMacro(Engines::Maxwell3D& maxwell3d); ~HLEMacro(); + std::optional> GetHLEProgram(u64 hash) const; private: -- cgit v1.2.3