diff options
| author | Yuri Kunde Schlesner <yuriks@yuriks.net> | 2016-12-02 20:24:24 -0800 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-12-02 20:24:24 -0800 | 
| commit | 018191c1f01980d6a642b0ef1bd0a4cd636c0178 (patch) | |
| tree | f37a96f68ab0a1e9a19bb46649963ef1908ed352 /src/video_core/shader | |
| parent | 0423a38ab55fd2ed7eb9853e9c867d31afd71649 (diff) | |
| parent | e2cb7d78332703ef4e4e119c9ed586334e8e4f30 (diff) | |
Merge pull request #2255 from JayFoxRox/lsl4
shader_jit: Load LOOPCOUNT_REG and LOOPINC 4 bit left-shifted
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 15 | 
1 files changed, 9 insertions, 6 deletions
| diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 211c703ab..9a3d6ca8f 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -102,11 +102,11 @@ static const X64Reg SETUP = R9;  /// The two 32-bit VS address offset registers set by the MOVA instruction  static const X64Reg ADDROFFS_REG_0 = R10;  static const X64Reg ADDROFFS_REG_1 = R11; -/// VS loop count register +/// VS loop count register (Multiplied by 16)  static const X64Reg LOOPCOUNT_REG = R12;  /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)  static const X64Reg LOOPCOUNT = RSI; -/// Number to increment LOOPCOUNT_REG by on each loop iteration +/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)  static const X64Reg LOOPINC = RDI;  /// Result of the previous CMP instruction for the X-component comparison  static const X64Reg COND0 = R13; @@ -718,15 +718,18 @@ void JitShader::Compile_LOOP(Instruction instr) {      looping = true; +    // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. +    // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by +    // 4 bits) to be used as an offset into the 16-byte vector registers later      int offset =          ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);      MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));      MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); -    SHR(32, R(LOOPCOUNT_REG), Imm8(8)); -    AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start +    SHR(32, R(LOOPCOUNT_REG), Imm8(4)); +    AND(32, R(LOOPCOUNT_REG), Imm32(0xFF0)); // Y-component is the start      MOV(32, R(LOOPINC), R(LOOPCOUNT)); -    SHR(32, R(LOOPINC), Imm8(16)); -    MOVZX(32, 8, LOOPINC, R(LOOPINC));     // Z-component is the incrementer +    SHR(32, R(LOOPINC), Imm8(12)); +    AND(32, R(LOOPINC), Imm32(0xFF0));     // Z-component is the incrementer      MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count      ADD(32, R(LOOPCOUNT), Imm8(1));        // Iteration count is X-component + 1 | 
