diff options
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 122 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 32 | 
2 files changed, 119 insertions, 35 deletions
| diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index d74b58d84..c798992ec 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -138,6 +138,15 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;  static const u8 NO_DEST_REG_MASK = 0xf;  /** + * Get the vertex shader instruction for a given offset in the current shader program + * @param offset Offset in the current shader program of the instruction + * @return Instruction at the specified offset + */ +static Instruction GetVertexShaderInstruction(size_t offset) { +    return { g_state.vs.program_code[offset] }; +} + +/**   * Loads and swizzles a source register into the specified XMM register.   * @param instr VS instruction, used for determining how to load the source register   * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) @@ -564,10 +573,23 @@ void JitCompiler::Compile_END(Instruction instr) {  }  void JitCompiler::Compile_CALL(Instruction instr) { -    unsigned offset = instr.flow_control.dest_offset; -    while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { -        Compile_NextInstr(&offset); -    } +    // Need to advance the return address past the proceeding instructions, this is the number of bytes to skip +    constexpr unsigned SKIP = 21; +    const uintptr_t start = reinterpret_cast<uintptr_t>(GetCodePtr()); + +    // Push return address - not using CALL because we also want to push the offset of the return before jumping +    MOV(64, R(RAX), ImmPtr(GetCodePtr() + SKIP)); +    PUSH(RAX); + +    // Push offset of the return +    PUSH(32, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); + +    // Jump +    FixupBranch b = J(true); +    fixup_branches.push_back({ b, instr.flow_control.dest_offset }); + +    // Make sure that if the above code changes, SKIP gets updated +    ASSERT(reinterpret_cast<uintptr_t>(GetCodePtr()) - start == SKIP);  }  void JitCompiler::Compile_CALLC(Instruction instr) { @@ -645,8 +667,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {  }  void JitCompiler::Compile_IF(Instruction instr) { -    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", -            *offset_ptr, instr.flow_control.dest_offset.Value()); +    ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements (%d -> %d) not supported", +        last_program_counter, instr.flow_control.dest_offset.Value());      // Evaluate the "IF" condition      if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -677,8 +699,8 @@ void JitCompiler::Compile_IF(Instruction instr) {  }  void JitCompiler::Compile_LOOP(Instruction instr) { -    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", -            *offset_ptr, instr.flow_control.dest_offset.Value()); +    ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards loops (%d -> %d) not supported", +        last_program_counter, instr.flow_control.dest_offset.Value());      ASSERT_MSG(!looping, "Nested loops not supported");      looping = true; @@ -706,9 +728,6 @@ void JitCompiler::Compile_LOOP(Instruction instr) {  }  void JitCompiler::Compile_JMP(Instruction instr) { -    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported", -            *offset_ptr, instr.flow_control.dest_offset.Value()); -      if (instr.opcode.Value() == OpCode::Id::JMPC)          Compile_EvaluateCondition(instr);      else if (instr.opcode.Value() == OpCode::Id::JMPU) @@ -718,31 +737,42 @@ void JitCompiler::Compile_JMP(Instruction instr) {      bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&          (instr.flow_control.num_instructions & 1); -    FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); - -    Compile_Block(instr.flow_control.dest_offset); -    SetJumpTarget(b); +    FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); +    fixup_branches.push_back({ b, instr.flow_control.dest_offset });  }  void JitCompiler::Compile_Block(unsigned end) { -    // Save current offset pointer -    unsigned* prev_offset_ptr = offset_ptr; -    unsigned offset = *prev_offset_ptr; +    while (program_counter < end) { +        Compile_NextInstr(); +    } +} -    while (offset < end) -        Compile_NextInstr(&offset); +void JitCompiler::Compile_Return() { +    // Peek return offset on the stack and check if we're at that offset +    MOV(64, R(RAX), MDisp(RSP, 0)); +    CMP(32, R(RAX), Imm32(program_counter)); -    // Restore current offset pointer -    offset_ptr = prev_offset_ptr; -    *offset_ptr = offset; +    // If so, jump back to before CALL +    FixupBranch b = J_CC(CC_NZ, true); +    ADD(64, R(RSP), Imm32(8)); // Ignore return offset that's on the stack +    POP(RAX); // Pop off return address +    JMPptr(R(RAX)); +    SetJumpTarget(b);  } -void JitCompiler::Compile_NextInstr(unsigned* offset) { -    offset_ptr = offset; +void JitCompiler::Compile_NextInstr() { +    last_program_counter = program_counter; + +    auto search = return_offsets.find(program_counter); +    if (search != return_offsets.end()) { +        Compile_Return(); +    } + +    ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); +    code_ptr[program_counter] = GetCodePtr(); -    Instruction instr; -    std::memcpy(&instr, &g_state.vs.program_code[(*offset_ptr)++], sizeof(Instruction)); +    Instruction instr = GetVertexShaderInstruction(program_counter++);      OpCode::Id opcode = instr.opcode.Value();      auto instr_func = instr_table[static_cast<unsigned>(opcode)]; @@ -757,9 +787,24 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {      }  } +void JitCompiler::FindReturnOffsets() { +    return_offsets.clear(); + +    for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { +        Instruction instr = GetVertexShaderInstruction(offset); + +        switch (instr.opcode.Value()) { +        case OpCode::Id::CALL: +        case OpCode::Id::CALLC: +        case OpCode::Id::CALLU: +            return_offsets.insert(instr.flow_control.dest_offset + instr.flow_control.num_instructions); +            break; +        } +    } +} +  CompiledShader* JitCompiler::Compile() {      const u8* start = GetCodePtr(); -    unsigned offset = g_state.regs.vs.main_offset;      // The stack pointer is 8 modulo 16 at the entry of a procedure      ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); @@ -782,10 +827,27 @@ CompiledShader* JitCompiler::Compile() {      MOV(PTRBITS, R(RAX), ImmPtr(&neg));      MOVAPS(NEGBIT, MatR(RAX)); +    // Find all `CALL` instructions and identify return locations +    FindReturnOffsets(); + +    // Reset flow control state +    last_program_counter = 0; +    program_counter = 0;      looping = false; +    code_ptr.fill(nullptr); +    fixup_branches.clear(); + +    // Jump to start of the shader program +    if (g_state.regs.vs.main_offset != 0) { +        fixup_branches.push_back({ J(true),  g_state.regs.vs.main_offset }); +    } + +    // Compile entire program +    Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); -    while (offset < g_state.vs.program_code.size()) { -        Compile_NextInstr(&offset); +    // Set the target for any incomplete branches now that the entire shader program has been emitted +    for (const auto& branch : fixup_branches) { +        SetJumpTarget(branch.first, code_ptr[branch.second]);      }      return (CompiledShader*)start; diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5357c964b..d6f03892d 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -4,6 +4,9 @@  #pragma once +#include <set> +#include <utility> +  #include <nihstro/shader_bytecode.h>  #include "common/x64/emitter.h" @@ -66,8 +69,9 @@ public:      void Compile_MAD(Instruction instr);  private: +      void Compile_Block(unsigned end); -    void Compile_NextInstr(unsigned* offset); +    void Compile_NextInstr();      void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);      void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); @@ -81,13 +85,31 @@ private:      void Compile_EvaluateCondition(Instruction instr);      void Compile_UniformCondition(Instruction instr); +    /** +     * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction. +     */ +    void Compile_Return(); +      BitSet32 PersistentCallerSavedRegs(); -    /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. -    unsigned* offset_ptr = nullptr; +    /** +     * Analyzes the entire shader program for `CALL` instructions before emitting any code, +     * identifying the locations where a return needs to be inserted. +     */ +    void FindReturnOffsets(); + +    /// Mapping of Pica VS instructions to pointers in the emitted code +    std::array<const u8*, 1024> code_ptr; + +    /// Offsets in code where a return needs to be inserted +    std::set<unsigned> return_offsets; + +    unsigned last_program_counter;  ///< Offset of the most recent instruction decoded +    unsigned program_counter;       ///< Offset of the next instruction to decode +    bool looping = false;           ///< True if compiling a loop, used to check for nested loops -    /// Set to true if currently in a loop, used to check for the existence of nested loops -    bool looping = false; +    /// Branches that need to be fixed up once the entire shader program is compiled +    std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;  };  } // Shader | 
