diff options
Diffstat (limited to 'src/common/x64')
| -rw-r--r-- | src/common/x64/abi.cpp | 350 | ||||
| -rw-r--r-- | src/common/x64/abi.h | 58 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 12 | ||||
| -rw-r--r-- | src/common/x64/emitter.cpp | 2583 | ||||
| -rw-r--r-- | src/common/x64/emitter.h | 1206 | 
5 files changed, 7 insertions, 4202 deletions
| diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp deleted file mode 100644 index 504b9c940..000000000 --- a/src/common/x64/abi.cpp +++ /dev/null @@ -1,350 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include "abi.h" -#include "emitter.h" - -using namespace Gen; - -// Shared code between Win64 and Unix64 - -void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, -                                      size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { -    size_t shadow = 0; -#if defined(_WIN32) -    shadow = 0x20; -#endif - -    int count = (mask & ABI_ALL_GPRS).Count(); -    rsp_alignment -= count * 8; -    size_t subtraction = 0; -    int fpr_count = (mask & ABI_ALL_FPRS).Count(); -    if (fpr_count) { -        // If we have any XMMs to save, we must align the stack here. -        subtraction = rsp_alignment & 0xf; -    } -    subtraction += 16 * fpr_count; -    size_t xmm_base_subtraction = subtraction; -    subtraction += needed_frame_size; -    subtraction += shadow; -    // Final alignment. -    rsp_alignment -= subtraction; -    subtraction += rsp_alignment & 0xf; - -    *shadowp = shadow; -    *subtractionp = subtraction; -    *xmm_offsetp = subtraction - xmm_base_subtraction; -} - -size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, -                                                 size_t needed_frame_size) { -    size_t shadow, subtraction, xmm_offset; -    ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, -                           &xmm_offset); - -    for (int r : mask& ABI_ALL_GPRS) -        PUSH((X64Reg)r); - -    if (subtraction) -        SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); - -    for (int x : mask& ABI_ALL_FPRS) { -        MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16)); -        xmm_offset += 16; -    } - -    return shadow; -} - -void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, -                                              size_t needed_frame_size) { -    size_t shadow, subtraction, xmm_offset; -    ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, -                           &xmm_offset); - -    for (int x : mask& ABI_ALL_FPRS) { -        MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset)); -        xmm_offset += 16; -    } - -    if (subtraction) -        ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); - -    for (int r = 15; r >= 0; r--) { -        if (mask[r]) -            POP((X64Reg)r); -    } -} - -// Common functions -void XEmitter::ABI_CallFunction(const void* func) { -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) { -    MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) { -    MOV(32, R(ABI_PARAM1), Imm32(param1)); -    MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) { -    MOV(32, R(ABI_PARAM1), Imm32(param1)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) { -    MOV(32, R(ABI_PARAM1), Imm32(param1)); -    MOV(32, R(ABI_PARAM2), Imm32(param2)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) { -    MOV(32, R(ABI_PARAM1), Imm32(param1)); -    MOV(32, R(ABI_PARAM2), Imm32(param2)); -    MOV(32, R(ABI_PARAM3), Imm32(param3)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) { -    MOV(32, R(ABI_PARAM1), Imm32(param1)); -    MOV(32, R(ABI_PARAM2), Imm32(param2)); -    MOV(64, R(ABI_PARAM3), ImmPtr(param3)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, -                                    void* param4) { -    MOV(32, R(ABI_PARAM1), Imm32(param1)); -    MOV(32, R(ABI_PARAM2), Imm32(param2)); -    MOV(32, R(ABI_PARAM3), Imm32(param3)); -    MOV(64, R(ABI_PARAM4), ImmPtr(param4)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionP(const void* func, void* param1) { -    MOV(64, R(ABI_PARAM1), ImmPtr(param1)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) { -    MOV(64, R(ABI_PARAM1), ImmPtr(param1)); -    if (!arg2.IsSimpleReg(ABI_PARAM2)) -        MOV(32, R(ABI_PARAM2), arg2); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2, -                                   const Gen::OpArg& arg3) { -    MOV(64, R(ABI_PARAM1), ImmPtr(param1)); -    if (!arg2.IsSimpleReg(ABI_PARAM2)) -        MOV(32, R(ABI_PARAM2), arg2); -    if (!arg3.IsSimpleReg(ABI_PARAM3)) -        MOV(32, R(ABI_PARAM3), arg3); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) { -    MOV(64, R(ABI_PARAM1), ImmPtr(param1)); -    MOV(64, R(ABI_PARAM2), ImmPtr(param2)); -    MOV(32, R(ABI_PARAM3), Imm32(param3)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -// Pass a register as a parameter. -void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) { -    if (reg1 != ABI_PARAM1) -        MOV(32, R(ABI_PARAM1), R(reg1)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -// Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) { -    if (reg2 != ABI_PARAM1) { -        if (reg1 != ABI_PARAM1) -            MOV(64, R(ABI_PARAM1), R(reg1)); -        if (reg2 != ABI_PARAM2) -            MOV(64, R(ABI_PARAM2), R(reg2)); -    } else { -        if (reg2 != ABI_PARAM2) -            MOV(64, R(ABI_PARAM2), R(reg2)); -        if (reg1 != ABI_PARAM1) -            MOV(64, R(ABI_PARAM1), R(reg1)); -    } -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) { -    if (!arg1.IsSimpleReg(ABI_PARAM1)) -        MOV(32, R(ABI_PARAM1), arg1); -    MOV(32, R(ABI_PARAM2), Imm32(param2)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2, -                                   u32 param3) { -    if (!arg1.IsSimpleReg(ABI_PARAM1)) -        MOV(32, R(ABI_PARAM1), arg1); -    MOV(32, R(ABI_PARAM2), Imm32(param2)); -    MOV(64, R(ABI_PARAM3), Imm64(param3)); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) { -    if (!arg1.IsSimpleReg(ABI_PARAM1)) -        MOV(32, R(ABI_PARAM1), arg1); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -} - -void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1, -                                  const Gen::OpArg& arg2) { -    if (!arg1.IsSimpleReg(ABI_PARAM1)) -        MOV(32, R(ABI_PARAM1), arg1); -    if (!arg2.IsSimpleReg(ABI_PARAM2)) -        MOV(32, R(ABI_PARAM2), arg2); -    u64 distance = u64(func) - (u64(code) + 5); -    if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { -        // Far call -        MOV(64, R(RAX), ImmPtr(func)); -        CALLptr(R(RAX)); -    } else { -        CALL(func); -    } -}
\ No newline at end of file diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h deleted file mode 100644 index eaaf81d89..000000000 --- a/src/common/x64/abi.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "common/bit_set.h" -#include "emitter.h" - -// x64 ABI:s, and helpers to help follow them when JIT-ing code. -// All convensions return values in EAX (+ possibly EDX). - -// Windows 64-bit -// * 4-reg "fastcall" variant, very new-skool stack handling -// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself -// calls_ -// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space. -// Scratch:      RAX RCX RDX R8 R9 R10 R11 -// Callee-save:  RBX RSI RDI RBP R12 R13 R14 R15 -// Parameters:   RCX RDX R8 R9, further MOV-ed - -// Linux 64-bit -// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed) -// Scratch:      RAX RCX RDX RSI RDI R8 R9 R10 R11 -// Callee-save:  RBX RBP R12 R13 R14 R15 -// Parameters:   RDI RSI RDX RCX R8 R9 - -#define ABI_ALL_FPRS BitSet32(0xffff0000) -#define ABI_ALL_GPRS BitSet32(0x0000ffff) - -#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention - -#define ABI_PARAM1 RCX -#define ABI_PARAM2 RDX -#define ABI_PARAM3 R8 -#define ABI_PARAM4 R9 - -// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers. -#define ABI_ALL_CALLER_SAVED                                                                       \ -    (BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16,         \ -              XMM4 + 16, XMM5 + 16}) -#else // 64-bit Unix / OS X - -#define ABI_PARAM1 RDI -#define ABI_PARAM2 RSI -#define ABI_PARAM3 RDX -#define ABI_PARAM4 RCX -#define ABI_PARAM5 R8 -#define ABI_PARAM6 R9 - -// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably -// don't actually clobber them. -#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS) -#endif // WIN32 - -#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED) - -#define ABI_RETURN RAX diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 370ae2c80..2cb3ab9cc 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -8,9 +8,9 @@  #include "common/common_types.h"  #include "cpu_detect.h" -namespace Common { - -#ifndef _MSC_VER +#ifdef _MSC_VER +#include <intrin.h> +#else  #if defined(__DragonFly__) || defined(__FreeBSD__)  // clang-format off @@ -37,13 +37,15 @@ static inline void __cpuid(int info[4], int function_id) {  }  #define _XCR_XFEATURE_ENABLED_MASK 0 -static u64 _xgetbv(u32 index) { +static inline u64 _xgetbv(u32 index) {      u32 eax, edx;      __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));      return ((u64)edx << 32) | eax;  } -#endif // ifndef _MSC_VER +#endif // _MSC_VER + +namespace Common {  // Detects the various CPU features  static CPUCaps Detect() { diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp deleted file mode 100644 index f5930abec..000000000 --- a/src/common/x64/emitter.cpp +++ /dev/null @@ -1,2583 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include <cinttypes> -#include <cstring> -#include "abi.h" -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/memory_util.h" -#include "cpu_detect.h" -#include "emitter.h" - -namespace Gen { - -struct NormalOpDef { -    u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext; -}; - -// 0xCC is code for invalid combination of immediates -static const NormalOpDef normalops[11] = { -    {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, // ADD -    {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, // ADC - -    {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, // SUB -    {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, // SBB - -    {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, // AND -    {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, // OR - -    {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, // XOR -    {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, // MOV - -    {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, // TEST (to == from) -    {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, // CMP - -    {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, // XCHG -}; - -enum NormalSSEOps { -    sseCMP = 0xC2, -    sseADD = 0x58,  // ADD -    sseSUB = 0x5C,  // SUB -    sseAND = 0x54,  // AND -    sseANDN = 0x55, // ANDN -    sseOR = 0x56, -    sseXOR = 0x57, -    sseMUL = 0x59,         // MUL -    sseDIV = 0x5E,         // DIV -    sseMIN = 0x5D,         // MIN -    sseMAX = 0x5F,         // MAX -    sseCOMIS = 0x2F,       // COMIS -    sseUCOMIS = 0x2E,      // UCOMIS -    sseSQRT = 0x51,        // SQRT -    sseRSQRT = 0x52,       // RSQRT (NO DOUBLE PRECISION!!!) -    sseRCP = 0x53,         // RCP -    sseMOVAPfromRM = 0x28, // MOVAP from RM -    sseMOVAPtoRM = 0x29,   // MOVAP to RM -    sseMOVUPfromRM = 0x10, // MOVUP from RM -    sseMOVUPtoRM = 0x11,   // MOVUP to RM -    sseMOVLPfromRM = 0x12, -    sseMOVLPtoRM = 0x13, -    sseMOVHPfromRM = 0x16, -    sseMOVHPtoRM = 0x17, -    sseMOVHLPS = 0x12, -    sseMOVLHPS = 0x16, -    sseMOVDQfromRM = 0x6F, -    sseMOVDQtoRM = 0x7F, -    sseMASKMOVDQU = 0xF7, -    sseLDDQU = 0xF0, -    sseSHUF = 0xC6, -    sseMOVNTDQ = 0xE7, -    sseMOVNTP = 0x2B, -    sseHADD = 0x7C, -}; - -void XEmitter::SetCodePtr(u8* ptr) { -    code = ptr; -} - -const u8* XEmitter::GetCodePtr() const { -    return code; -} - -u8* XEmitter::GetWritableCodePtr() { -    return code; -} - -void XEmitter::Write8(u8 value) { -    *code++ = value; -} - -void XEmitter::Write16(u16 value) { -    std::memcpy(code, &value, sizeof(u16)); -    code += sizeof(u16); -} - -void XEmitter::Write32(u32 value) { -    std::memcpy(code, &value, sizeof(u32)); -    code += sizeof(u32); -} - -void XEmitter::Write64(u64 value) { -    std::memcpy(code, &value, sizeof(u64)); -    code += sizeof(u64); -} - -void XEmitter::ReserveCodeSpace(int bytes) { -    for (int i = 0; i < bytes; i++) -        *code++ = 0xCC; -} - -const u8* XEmitter::AlignCode4() { -    int c = int((u64)code & 3); -    if (c) -        ReserveCodeSpace(4 - c); -    return code; -} - -const u8* XEmitter::AlignCode16() { -    int c = int((u64)code & 15); -    if (c) -        ReserveCodeSpace(16 - c); -    return code; -} - -const u8* XEmitter::AlignCodePage() { -    int c = int((u64)code & 4095); -    if (c) -        ReserveCodeSpace(4096 - c); -    return code; -} - -// This operation modifies flags; check to see the flags are locked. -// If the flags are locked, we should immediately and loudly fail before -// causing a subtle JIT bug. -void XEmitter::CheckFlags() { -    ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!"); -} - -void XEmitter::WriteModRM(int mod, int reg, int rm) { -    Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); -} - -void XEmitter::WriteSIB(int scale, int index, int base) { -    Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); -} - -void OpArg::WriteRex(XEmitter* emit, int opBits, int bits, int customOp) const { -    if (customOp == -1) -        customOp = operandReg; -#ifdef ARCHITECTURE_x86_64 -    u8 op = 0x40; -    // REX.W (whether operation is a 64-bit operation) -    if (opBits == 64) -        op |= 8; -    // REX.R (whether ModR/M reg field refers to R8-R15. -    if (customOp & 8) -        op |= 4; -    // REX.X (whether ModR/M SIB index field refers to R8-R15) -    if (indexReg & 8) -        op |= 2; -    // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15) -    if (offsetOrBaseReg & 8) -        op |= 1; -    // Write REX if wr have REX bits to write, or if the operation accesses -    // SIL, DIL, BPL, or SPL. -    if (op != 0x40 || (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || -        (opBits == 8 && (customOp & 0x10c) == 4)) { -        emit->Write8(op); -        // Check the operation doesn't access AH, BH, CH, or DH. -        DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0); -        DEBUG_ASSERT((customOp & 0x100) == 0); -    } -#else -    DEBUG_ASSERT(opBits != 64); -    DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1); -    DEBUG_ASSERT((indexReg & 8) == 0); -    DEBUG_ASSERT((offsetOrBaseReg & 8) == 0); -    DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1); -    DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4); -#endif -} - -void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, -                     int W) const { -    int R = !(regOp1 & 8); -    int X = !(indexReg & 8); -    int B = !(offsetOrBaseReg & 8); - -    int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); - -    // do we need any VEX fields that only appear in the three-byte form? -    if (X == 1 && B == 1 && W == 0 && mmmmm == 1) { -        u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp; -        emit->Write8(0xC5); -        emit->Write8(RvvvvLpp); -    } else { -        u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm; -        u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp; -        emit->Write8(0xC4); -        emit->Write8(RXBmmmmm); -        emit->Write8(WvvvvLpp); -    } -} - -void OpArg::WriteRest(XEmitter* emit, int extraBytes, X64Reg _operandReg, -                      bool warn_64bit_offset) const { -    if (_operandReg == INVALID_REG) -        _operandReg = (X64Reg)this->operandReg; -    int mod = 0; -    int ireg = indexReg; -    bool SIB = false; -    int _offsetOrBaseReg = this->offsetOrBaseReg; - -    if (scale == SCALE_RIP) // Also, on 32-bit, just an immediate address -    { -        // Oh, RIP addressing. -        _offsetOrBaseReg = 5; -        emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); -// TODO : add some checks -#ifdef ARCHITECTURE_x86_64 -        u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; -        s64 distance = (s64)offset - (s64)ripAddr; -        ASSERT_MSG((distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset, -                   "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr, -                   offset); -        s32 offs = (s32)distance; -        emit->Write32((u32)offs); -#else -        emit->Write32((u32)offset); -#endif -        return; -    } - -    if (scale == 0) { -        // Oh, no memory, Just a reg. -        mod = 3; // 11 -    } else if (scale >= 1) { -        // Ah good, no scaling. -        if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) { -            // Okay, we're good. No SIB necessary. -            int ioff = (int)offset; -            if (ioff == 0) { -                mod = 0; -            } else if (ioff < -128 || ioff > 127) { -                mod = 2; // 32-bit displacement -            } else { -                mod = 1; // 8-bit displacement -            } -        } else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) { -            SIB = true; -            mod = 0; -            _offsetOrBaseReg = 5; -        } else // if (scale != SCALE_ATREG) -        { -            if ((_offsetOrBaseReg & 7) == 4) // this would occupy the SIB encoding :( -            { -                // So we have to fake it with SIB encoding :( -                SIB = true; -            } - -            if (scale >= SCALE_1 && scale < SCALE_ATREG) { -                SIB = true; -            } - -            if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) { -                SIB = true; -                ireg = _offsetOrBaseReg; -            } - -            // Okay, we're fine. Just disp encoding. -            // We need displacement. Which size? -            int ioff = (int)(s64)offset; -            if (ioff < -128 || ioff > 127) { -                mod = 2; // 32-bit displacement -            } else { -                mod = 1; // 8-bit displacement -            } -        } -    } - -    // Okay. Time to do the actual writing -    // ModRM byte: -    int oreg = _offsetOrBaseReg; -    if (SIB) -        oreg = 4; - -    // TODO(ector): WTF is this if about? I don't remember writing it :-) -    // if (RIP) -    //    oreg = 5; - -    emit->WriteModRM(mod, _operandReg & 7, oreg & 7); - -    if (SIB) { -        // SIB byte -        int ss; -        switch (scale) { -        case SCALE_NONE: -            _offsetOrBaseReg = 4; -            ss = 0; -            break; // RSP -        case SCALE_1: -            ss = 0; -            break; -        case SCALE_2: -            ss = 1; -            break; -        case SCALE_4: -            ss = 2; -            break; -        case SCALE_8: -            ss = 3; -            break; -        case SCALE_NOBASE_2: -            ss = 1; -            break; -        case SCALE_NOBASE_4: -            ss = 2; -            break; -        case SCALE_NOBASE_8: -            ss = 3; -            break; -        case SCALE_ATREG: -            ss = 0; -            break; -        default: -            ASSERT_MSG(0, "Invalid scale for SIB byte"); -            ss = 0; -            break; -        } -        emit->Write8((u8)((ss << 6) | ((ireg & 7) << 3) | (_offsetOrBaseReg & 7))); -    } - -    if (mod == 1) // 8-bit disp -    { -        emit->Write8((u8)(s8)(s32)offset); -    } else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) // 32-bit disp -    { -        emit->Write32((u32)offset); -    } -} - -// W = operand extended width (1 if 64-bit) -// R = register# upper bit -// X = scale amnt upper bit -// B = base register# upper bit -void XEmitter::Rex(int w, int r, int x, int b) { -    w = w ? 1 : 0; -    r = r ? 1 : 0; -    x = x ? 1 : 0; -    b = b ? 1 : 0; -    u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b)); -    if (rx != 0x40) -        Write8(rx); -} - -void XEmitter::JMP(const u8* addr, bool force5Bytes) { -    u64 fn = (u64)addr; -    if (!force5Bytes) { -        s64 distance = (s64)(fn - ((u64)code + 2)); -        ASSERT_MSG(distance >= -0x80 && distance < 0x80, -                   "Jump target too far away, needs force5Bytes = true"); -        // 8 bits will do -        Write8(0xEB); -        Write8((u8)(s8)distance); -    } else { -        s64 distance = (s64)(fn - ((u64)code + 5)); - -        ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, -                   "Jump target too far away, needs indirect register"); -        Write8(0xE9); -        Write32((u32)(s32)distance); -    } -} - -void XEmitter::JMPptr(const OpArg& arg2) { -    OpArg arg = arg2; -    if (arg.IsImm()) -        ASSERT_MSG(0, "JMPptr - Imm argument"); -    arg.operandReg = 4; -    arg.WriteRex(this, 0, 0); -    Write8(0xFF); -    arg.WriteRest(this); -} - -// Can be used to trap other processors, before overwriting their code -// not used in dolphin -void XEmitter::JMPself() { -    Write8(0xEB); -    Write8(0xFE); -} - -void XEmitter::CALLptr(OpArg arg) { -    if (arg.IsImm()) -        ASSERT_MSG(0, "CALLptr - Imm argument"); -    arg.operandReg = 2; -    arg.WriteRex(this, 0, 0); -    Write8(0xFF); -    arg.WriteRest(this); -} - -void XEmitter::CALL(const void* fnptr) { -    u64 distance = u64(fnptr) - (u64(code) + 5); -    ASSERT_MSG(distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL, -               "CALL out of range (%p calls %p)", code, fnptr); -    Write8(0xE8); -    Write32(u32(distance)); -} - -FixupBranch XEmitter::CALL() { -    FixupBranch branch; -    branch.type = 1; -    branch.ptr = code + 5; - -    Write8(0xE8); -    Write32(0); - -    return branch; -} - -FixupBranch XEmitter::J(bool force5bytes) { -    FixupBranch branch; -    branch.type = force5bytes ? 1 : 0; -    branch.ptr = code + (force5bytes ? 5 : 2); -    if (!force5bytes) { -        // 8 bits will do -        Write8(0xEB); -        Write8(0); -    } else { -        Write8(0xE9); -        Write32(0); -    } -    return branch; -} - -FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) { -    FixupBranch branch; -    branch.type = force5bytes ? 1 : 0; -    branch.ptr = code + (force5bytes ? 6 : 2); -    if (!force5bytes) { -        // 8 bits will do -        Write8(0x70 + conditionCode); -        Write8(0); -    } else { -        Write8(0x0F); -        Write8(0x80 + conditionCode); -        Write32(0); -    } -    return branch; -} - -void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) { -    u64 fn = (u64)addr; -    s64 distance = (s64)(fn - ((u64)code + 2)); -    if (distance < -0x80 || distance >= 0x80 || force5bytes) { -        distance = (s64)(fn - ((u64)code + 6)); -        ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, -                   "Jump target too far away, needs indirect register"); -        Write8(0x0F); -        Write8(0x80 + conditionCode); -        Write32((u32)(s32)distance); -    } else { -        Write8(0x70 + conditionCode); -        Write8((u8)(s8)distance); -    } -} - -void XEmitter::SetJumpTarget(const FixupBranch& branch) { -    if (branch.type == 0) { -        s64 distance = (s64)(code - branch.ptr); -        ASSERT_MSG(distance >= -0x80 && distance < 0x80, -                   "Jump target too far away, needs force5Bytes = true"); -        branch.ptr[-1] = (u8)(s8)distance; -    } else if (branch.type == 1) { -        s64 distance = (s64)(code - branch.ptr); -        ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, -                   "Jump target too far away, needs indirect register"); -        ((s32*)branch.ptr)[-1] = (s32)distance; -    } -} - -void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) { -    if (branch.type == 0) { -        s64 distance = (s64)(target - branch.ptr); -        ASSERT_MSG(distance >= -0x80 && distance < 0x80, -                   "Jump target too far away, needs force5Bytes = true"); -        branch.ptr[-1] = (u8)(s8)distance; -    } else if (branch.type == 1) { -        s64 distance = (s64)(target - branch.ptr); -        ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, -                   "Jump target too far away, needs indirect register"); -        ((s32*)branch.ptr)[-1] = (s32)distance; -    } -} - -// Single byte opcodes -// There is no PUSHAD/POPAD in 64-bit mode. -void XEmitter::INT3() { -    Write8(0xCC); -} -void XEmitter::RET() { -    Write8(0xC3); -} -void XEmitter::RET_FAST() { -    Write8(0xF3); -    Write8(0xC3); -} // two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a -  // ret - -// The first sign of decadence: optimized NOPs. -void XEmitter::NOP(size_t size) { -    DEBUG_ASSERT((int)size > 0); -    while (true) { -        switch (size) { -        case 0: -            return; -        case 1: -            Write8(0x90); -            return; -        case 2: -            Write8(0x66); -            Write8(0x90); -            return; -        case 3: -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x00); -            return; -        case 4: -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x40); -            Write8(0x00); -            return; -        case 5: -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x44); -            Write8(0x00); -            Write8(0x00); -            return; -        case 6: -            Write8(0x66); -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x44); -            Write8(0x00); -            Write8(0x00); -            return; -        case 7: -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x80); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            return; -        case 8: -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x84); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            return; -        case 9: -            Write8(0x66); -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x84); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            return; -        case 10: -            Write8(0x66); -            Write8(0x66); -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x84); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            return; -        default: -            // Even though x86 instructions are allowed to be up to 15 bytes long, -            // AMD advises against using NOPs longer than 11 bytes because they -            // carry a performance penalty on CPUs older than AMD family 16h. -            Write8(0x66); -            Write8(0x66); -            Write8(0x66); -            Write8(0x0F); -            Write8(0x1F); -            Write8(0x84); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            Write8(0x00); -            size -= 11; -            continue; -        } -    } -} - -void XEmitter::PAUSE() { -    Write8(0xF3); -    NOP(); -} // use in tight spinloops for energy saving on some cpu -void XEmitter::CLC() { -    CheckFlags(); -    Write8(0xF8); -} // clear carry -void XEmitter::CMC() { -    CheckFlags(); -    Write8(0xF5); -} // flip carry -void XEmitter::STC() { -    CheckFlags(); -    Write8(0xF9); -} // set carry - -// TODO: xchg ah, al ??? -void XEmitter::XCHG_AHAL() { -    Write8(0x86); -    Write8(0xe0); -    // alt. 86 c4 -} - -// These two can not be executed on early Intel 64-bit CPU:s, only on AMD! -void XEmitter::LAHF() { -    Write8(0x9F); -} -void XEmitter::SAHF() { -    CheckFlags(); -    Write8(0x9E); -} - -void XEmitter::PUSHF() { -    Write8(0x9C); -} -void XEmitter::POPF() { -    CheckFlags(); -    Write8(0x9D); -} - -void XEmitter::LFENCE() { -    Write8(0x0F); -    Write8(0xAE); -    Write8(0xE8); -} -void XEmitter::MFENCE() { -    Write8(0x0F); -    Write8(0xAE); -    Write8(0xF0); -} -void XEmitter::SFENCE() { -    Write8(0x0F); -    Write8(0xAE); -    Write8(0xF8); -} - -void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) { -    if (bits == 16) -        Write8(0x66); -    Rex(bits == 64, 0, 0, (int)reg >> 3); -    Write8(byte + ((int)reg & 7)); -} - -void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) { -    if (bits == 16) -        Write8(0x66); -    Rex(bits == 64, 0, 0, (int)reg >> 3); -    Write8(byte1); -    Write8(byte2 + ((int)reg & 7)); -} - -void XEmitter::CWD(int bits) { -    if (bits == 16) -        Write8(0x66); -    Rex(bits == 64, 0, 0, 0); -    Write8(0x99); -} - -void XEmitter::CBW(int bits) { -    if (bits == 8) -        Write8(0x66); -    Rex(bits == 32, 0, 0, 0); -    Write8(0x98); -} - -// Simple opcodes - -// push/pop do not need wide to be 64-bit -void XEmitter::PUSH(X64Reg reg) { -    WriteSimple1Byte(32, 0x50, reg); -} -void XEmitter::POP(X64Reg reg) { -    WriteSimple1Byte(32, 0x58, reg); -} - -void XEmitter::PUSH(int bits, const OpArg& reg) { -    if (reg.IsSimpleReg()) -        PUSH(reg.GetSimpleReg()); -    else if (reg.IsImm()) { -        switch (reg.GetImmBits()) { -        case 8: -            Write8(0x6A); -            Write8((u8)(s8)reg.offset); -            break; -        case 16: -            Write8(0x66); -            Write8(0x68); -            Write16((u16)(s16)(s32)reg.offset); -            break; -        case 32: -            Write8(0x68); -            Write32((u32)reg.offset); -            break; -        default: -            ASSERT_MSG(0, "PUSH - Bad imm bits"); -            break; -        } -    } else { -        if (bits == 16) -            Write8(0x66); -        reg.WriteRex(this, bits, bits); -        Write8(0xFF); -        reg.WriteRest(this, 0, (X64Reg)6); -    } -} - -void XEmitter::POP(int /*bits*/, const OpArg& reg) { -    if (reg.IsSimpleReg()) -        POP(reg.GetSimpleReg()); -    else -        ASSERT_MSG(0, "POP - Unsupported encoding"); -} - -void XEmitter::BSWAP(int bits, X64Reg reg) { -    if (bits >= 32) { -        WriteSimple2Byte(bits, 0x0F, 0xC8, reg); -    } else if (bits == 16) { -        ROL(16, R(reg), Imm8(8)); -    } else if (bits == 8) { -        // Do nothing - can't bswap a single byte... -    } else { -        ASSERT_MSG(0, "BSWAP - Wrong number of bits"); -    } -} - -// Undefined opcode - reserved -// If we ever need a way to always cause a non-breakpoint hard exception... -void XEmitter::UD2() { -    Write8(0x0F); -    Write8(0x0B); -} - -void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) { -    ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument"); -    arg.operandReg = (u8)level; -    arg.WriteRex(this, 0, 0); -    Write8(0x0F); -    Write8(0x18); -    arg.WriteRest(this); -} - -void XEmitter::SETcc(CCFlags flag, OpArg dest) { -    ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument"); -    dest.operandReg = 0; -    dest.WriteRex(this, 0, 8); -    Write8(0x0F); -    Write8(0x90 + (u8)flag); -    dest.WriteRest(this); -} - -void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) { -    ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument"); -    ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported"); -    if (bits == 16) -        Write8(0x66); -    src.operandReg = dest; -    src.WriteRex(this, bits, bits); -    Write8(0x0F); -    Write8(0x40 + (u8)flag); -    src.WriteRest(this); -} - -void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) { -    ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument"); -    CheckFlags(); -    src.operandReg = ext; -    if (bits == 16) -        Write8(0x66); -    src.WriteRex(this, bits, bits, 0); -    if (bits == 8) { -        Write8(0xF6); -    } else { -        Write8(0xF7); -    } -    src.WriteRest(this); -} - -void XEmitter::MUL(int bits, const OpArg& src) { -    WriteMulDivType(bits, src, 4); -} -void XEmitter::DIV(int bits, const OpArg& src) { -    WriteMulDivType(bits, src, 6); -} -void XEmitter::IMUL(int bits, const OpArg& src) { -    WriteMulDivType(bits, src, 5); -} -void XEmitter::IDIV(int bits, const OpArg& src) { -    WriteMulDivType(bits, src, 7); -} -void XEmitter::NEG(int bits, const OpArg& src) { -    WriteMulDivType(bits, src, 3); -} -void XEmitter::NOT(int bits, const OpArg& src) { -    WriteMulDivType(bits, src, 2); -} - -void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) { -    ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument"); -    CheckFlags(); -    src.operandReg = (u8)dest; -    if (bits == 16) -        Write8(0x66); -    if (rep) -        Write8(0xF3); -    src.WriteRex(this, bits, bits); -    Write8(0x0F); -    Write8(byte2); -    src.WriteRest(this); -} - -void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) { -    if (bits <= 16) -        ASSERT_MSG(0, "MOVNTI - bits<=16"); -    WriteBitSearchType(bits, src, dest, 0xC3); -} - -void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) { -    WriteBitSearchType(bits, dest, src, 0xBC); -} // Bottom bit to top bit -void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) { -    WriteBitSearchType(bits, dest, src, 0xBD); -} // Top bit to bottom bit - -void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) { -    CheckFlags(); -    if (!Common::GetCPUCaps().bmi1) -        ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); -    WriteBitSearchType(bits, dest, src, 0xBC, true); -} -void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) { -    CheckFlags(); -    if (!Common::GetCPUCaps().lzcnt) -        ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer."); -    WriteBitSearchType(bits, dest, src, 0xBD, true); -} - -void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { -    ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument"); -    if (dbits == sbits) { -        MOV(dbits, R(dest), src); -        return; -    } -    src.operandReg = (u8)dest; -    if (dbits == 16) -        Write8(0x66); -    src.WriteRex(this, dbits, sbits); -    if (sbits == 8) { -        Write8(0x0F); -        Write8(0xBE); -    } else if (sbits == 16) { -        Write8(0x0F); -        Write8(0xBF); -    } else if (sbits == 32 && dbits == 64) { -        Write8(0x63); -    } else { -        Crash(); -    } -    src.WriteRest(this); -} - -void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) { -    ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument"); -    if (dbits == sbits) { -        MOV(dbits, R(dest), src); -        return; -    } -    src.operandReg = (u8)dest; -    if (dbits == 16) -        Write8(0x66); -    // the 32bit result is automatically zero extended to 64bit -    src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits); -    if (sbits == 8) { -        Write8(0x0F); -        Write8(0xB6); -    } else if (sbits == 16) { -        Write8(0x0F); -        Write8(0xB7); -    } else if (sbits == 32 && dbits == 64) { -        Write8(0x8B); -    } else { -        ASSERT_MSG(0, "MOVZX - Invalid size"); -    } -    src.WriteRest(this); -} - -void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) { -    ASSERT_MSG(Common::GetCPUCaps().movbe, -               "Generating MOVBE on a system that does not support it."); -    if (bits == 8) { -        MOV(bits, dest, src); -        return; -    } - -    if (bits == 16) -        Write8(0x66); - -    if (dest.IsSimpleReg()) { -        ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem"); -        src.WriteRex(this, bits, bits, dest.GetSimpleReg()); -        Write8(0x0F); -        Write8(0x38); -        Write8(0xF0); -        src.WriteRest(this, 0, dest.GetSimpleReg()); -    } else if (src.IsSimpleReg()) { -        ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem"); -        dest.WriteRex(this, bits, bits, src.GetSimpleReg()); -        Write8(0x0F); -        Write8(0x38); -        Write8(0xF1); -        dest.WriteRest(this, 0, src.GetSimpleReg()); -    } else { -        ASSERT_MSG(0, "MOVBE: Not loading or storing to mem"); -    } -} - -void XEmitter::LEA(int bits, X64Reg dest, OpArg src) { -    ASSERT_MSG(!src.IsImm(), "LEA - Imm argument"); -    src.operandReg = (u8)dest; -    if (bits == 16) -        Write8(0x66); // TODO: performance warning -    src.WriteRex(this, bits, bits); -    Write8(0x8D); -    src.WriteRest(this, 0, INVALID_REG, bits == 64); -} - -// shift can be either imm8 or cl -void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) { -    CheckFlags(); -    bool writeImm = false; -    if (dest.IsImm()) { -        ASSERT_MSG(0, "WriteShift - can't shift imms"); -    } -    if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || -        (shift.IsImm() && shift.GetImmBits() != 8)) { -        ASSERT_MSG(0, "WriteShift - illegal argument"); -    } -    dest.operandReg = ext; -    if (bits == 16) -        Write8(0x66); -    dest.WriteRex(this, bits, bits, 0); -    if (shift.GetImmBits() == 8) { -        // ok an imm -        u8 imm = (u8)shift.offset; -        if (imm == 1) { -            Write8(bits == 8 ? 0xD0 : 0xD1); -        } else { -            writeImm = true; -            Write8(bits == 8 ? 0xC0 : 0xC1); -        } -    } else { -        Write8(bits == 8 ? 0xD2 : 0xD3); -    } -    dest.WriteRest(this, writeImm ? 1 : 0); -    if (writeImm) -        Write8((u8)shift.offset); -} - -// large rotates and shift are slower on intel than amd -// intel likes to rotate by 1, and the op is smaller too -void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) { -    WriteShift(bits, dest, shift, 0); -} -void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) { -    WriteShift(bits, dest, shift, 1); -} -void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) { -    WriteShift(bits, dest, shift, 2); -} -void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) { -    WriteShift(bits, dest, shift, 3); -} -void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) { -    WriteShift(bits, dest, shift, 4); -} -void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) { -    WriteShift(bits, dest, shift, 5); -} -void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) { -    WriteShift(bits, dest, shift, 7); -} - -// index can be either imm8 or register, don't use memory destination because it's slow -void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) { -    CheckFlags(); -    if (dest.IsImm()) { -        ASSERT_MSG(0, "WriteBitTest - can't test imms"); -    } -    if ((index.IsImm() && index.GetImmBits() != 8)) { -        ASSERT_MSG(0, "WriteBitTest - illegal argument"); -    } -    if (bits == 16) -        Write8(0x66); -    if (index.IsImm()) { -        dest.WriteRex(this, bits, bits); -        Write8(0x0F); -        Write8(0xBA); -        dest.WriteRest(this, 1, (X64Reg)ext); -        Write8((u8)index.offset); -    } else { -        X64Reg operand = index.GetSimpleReg(); -        dest.WriteRex(this, bits, bits, operand); -        Write8(0x0F); -        Write8(0x83 + 8 * ext); -        dest.WriteRest(this, 1, operand); -    } -} - -void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) { -    WriteBitTest(bits, dest, index, 4); -} -void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) { -    WriteBitTest(bits, dest, index, 5); -} -void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) { -    WriteBitTest(bits, dest, index, 6); -} -void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) { -    WriteBitTest(bits, dest, index, 7); -} - -// shift can be either imm8 or cl -void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { -    CheckFlags(); -    if (dest.IsImm()) { -        ASSERT_MSG(0, "SHRD - can't use imms as destination"); -    } -    if (!src.IsSimpleReg()) { -        ASSERT_MSG(0, "SHRD - must use simple register as source"); -    } -    if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || -        (shift.IsImm() && shift.GetImmBits() != 8)) { -        ASSERT_MSG(0, "SHRD - illegal shift"); -    } -    if (bits == 16) -        Write8(0x66); -    X64Reg operand = src.GetSimpleReg(); -    dest.WriteRex(this, bits, bits, operand); -    if (shift.GetImmBits() == 8) { -        Write8(0x0F); -        Write8(0xAC); -        dest.WriteRest(this, 1, operand); -        Write8((u8)shift.offset); -    } else { -        Write8(0x0F); -        Write8(0xAD); -        dest.WriteRest(this, 0, operand); -    } -} - -void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { -    CheckFlags(); -    if (dest.IsImm()) { -        ASSERT_MSG(0, "SHLD - can't use imms as destination"); -    } -    if (!src.IsSimpleReg()) { -        ASSERT_MSG(0, "SHLD - must use simple register as source"); -    } -    if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || -        (shift.IsImm() && shift.GetImmBits() != 8)) { -        ASSERT_MSG(0, "SHLD - illegal shift"); -    } -    if (bits == 16) -        Write8(0x66); -    X64Reg operand = src.GetSimpleReg(); -    dest.WriteRex(this, bits, bits, operand); -    if (shift.GetImmBits() == 8) { -        Write8(0x0F); -        Write8(0xA4); -        dest.WriteRest(this, 1, operand); -        Write8((u8)shift.offset); -    } else { -        Write8(0x0F); -        Write8(0xA5); -        dest.WriteRest(this, 0, operand); -    } -} - -void OpArg::WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg _operandReg, int bits) { -    if (bits == 16) -        emit->Write8(0x66); - -    this->operandReg = (u8)_operandReg; -    WriteRex(emit, bits, bits); -    emit->Write8(op); -    WriteRest(emit); -} - -// operand can either be immediate or register -void OpArg::WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, -                          int bits) const { -    X64Reg _operandReg; -    if (IsImm()) { -        ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order"); -    } - -    if (bits == 16) -        emit->Write8(0x66); - -    int immToWrite = 0; - -    if (operand.IsImm()) { -        WriteRex(emit, bits, bits); - -        if (!toRM) { -            ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)"); -        } - -        if (operand.scale == SCALE_IMM8 && bits == 8) { -            // op al, imm8 -            if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) { -                emit->Write8(normalops[op].eaximm8); -                emit->Write8((u8)operand.offset); -                return; -            } -            // mov reg, imm8 -            if (!scale && op == nrmMOV) { -                emit->Write8(0xB0 + (offsetOrBaseReg & 7)); -                emit->Write8((u8)operand.offset); -                return; -            } -            // op r/m8, imm8 -            emit->Write8(normalops[op].imm8); -            immToWrite = 8; -        } else if ((operand.scale == SCALE_IMM16 && bits == 16) || -                   (operand.scale == SCALE_IMM32 && bits == 32) || -                   (operand.scale == SCALE_IMM32 && bits == 64)) { -            // Try to save immediate size if we can, but first check to see -            // if the instruction supports simm8. -            // op r/m, imm8 -            if (normalops[op].simm8 != 0xCC && -                ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) || -                 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) { -                emit->Write8(normalops[op].simm8); -                immToWrite = 8; -            } else { -                // mov reg, imm -                if (!scale && op == nrmMOV && bits != 64) { -                    emit->Write8(0xB8 + (offsetOrBaseReg & 7)); -                    if (bits == 16) -                        emit->Write16((u16)operand.offset); -                    else -                        emit->Write32((u32)operand.offset); -                    return; -                } -                // op eax, imm -                if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) { -                    emit->Write8(normalops[op].eaximm32); -                    if (bits == 16) -                        emit->Write16((u16)operand.offset); -                    else -                        emit->Write32((u32)operand.offset); -                    return; -                } -                // op r/m, imm -                emit->Write8(normalops[op].imm32); -                immToWrite = bits == 16 ? 16 : 32; -            } -        } else if ((operand.scale == SCALE_IMM8 && bits == 16) || -                   (operand.scale == SCALE_IMM8 && bits == 32) || -                   (operand.scale == SCALE_IMM8 && bits == 64)) { -            // op r/m, imm8 -            emit->Write8(normalops[op].simm8); -            immToWrite = 8; -        } else if (operand.scale == SCALE_IMM64 && bits == 64) { -            if (scale) { -                ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination"); -            } -            // mov reg64, imm64 -            else if (op == nrmMOV) { -                emit->Write8(0xB8 + (offsetOrBaseReg & 7)); -                emit->Write64((u64)operand.offset); -                return; -            } -            ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm"); -        } else { -            ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); -        } -        _operandReg = (X64Reg)normalops[op].ext; // pass extension in REG of ModRM -    } else { -        _operandReg = (X64Reg)operand.offsetOrBaseReg; -        WriteRex(emit, bits, bits, _operandReg); -        // op r/m, reg -        if (toRM) { -            emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32); -        } -        // op reg, r/m -        else { -            emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32); -        } -    } -    WriteRest(emit, immToWrite >> 3, _operandReg); -    switch (immToWrite) { -    case 0: -        break; -    case 8: -        emit->Write8((u8)operand.offset); -        break; -    case 16: -        emit->Write16((u16)operand.offset); -        break; -    case 32: -        emit->Write32((u32)operand.offset); -        break; -    default: -        ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); -    } -} - -void XEmitter::WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, -                             const OpArg& a2) { -    if (a1.IsImm()) { -        // Booh! Can't write to an imm -        ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm"); -        return; -    } -    if (a2.IsImm()) { -        a1.WriteNormalOp(emit, true, op, a2, bits); -    } else { -        if (a1.IsSimpleReg()) { -            a2.WriteNormalOp(emit, false, op, a1, bits); -        } else { -            ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), -                       "WriteNormalOp - a1 and a2 cannot both be memory"); -            a1.WriteNormalOp(emit, true, op, a2, bits); -        } -    } -} - -void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmADD, a1, a2); -} -void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmADC, a1, a2); -} -void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmSUB, a1, a2); -} -void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmSBB, a1, a2); -} -void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmAND, a1, a2); -} -void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmOR, a1, a2); -} -void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmXOR, a1, a2); -} -void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) { -    if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) -        LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); -    WriteNormalOp(this, bits, nrmMOV, a1, a2); -} -void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmTEST, a1, a2); -} -void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    WriteNormalOp(this, bits, nrmCMP, a1, a2); -} -void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) { -    WriteNormalOp(this, bits, nrmXCHG, a1, a2); -} - -void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) { -    CheckFlags(); -    if (bits == 8) { -        ASSERT_MSG(0, "IMUL - illegal bit size!"); -        return; -    } - -    if (a1.IsImm()) { -        ASSERT_MSG(0, "IMUL - second arg cannot be imm!"); -        return; -    } - -    if (!a2.IsImm()) { -        ASSERT_MSG(0, "IMUL - third arg must be imm!"); -        return; -    } - -    if (bits == 16) -        Write8(0x66); -    a1.WriteRex(this, bits, bits, regOp); - -    if (a2.GetImmBits() == 8 || (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || -        (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) { -        Write8(0x6B); -        a1.WriteRest(this, 1, regOp); -        Write8((u8)a2.offset); -    } else { -        Write8(0x69); -        if (a2.GetImmBits() == 16 && bits == 16) { -            a1.WriteRest(this, 2, regOp); -            Write16((u16)a2.offset); -        } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) { -            a1.WriteRest(this, 4, regOp); -            Write32((u32)a2.offset); -        } else { -            ASSERT_MSG(0, "IMUL - unhandled case!"); -        } -    } -} - -void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) { -    CheckFlags(); -    if (bits == 8) { -        ASSERT_MSG(0, "IMUL - illegal bit size!"); -        return; -    } - -    if (a.IsImm()) { -        IMUL(bits, regOp, R(regOp), a); -        return; -    } - -    if (bits == 16) -        Write8(0x66); -    a.WriteRex(this, bits, bits, regOp); -    Write8(0x0F); -    Write8(0xAF); -    a.WriteRest(this, 0, regOp); -} - -void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) { -    if (opPrefix) -        Write8(opPrefix); -    arg.operandReg = regOp; -    arg.WriteRex(this, 0, 0); -    Write8(0x0F); -    if (op > 0xFF) -        Write8((op >> 8) & 0xFF); -    Write8(op & 0xFF); -    arg.WriteRest(this, extrabytes); -} - -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { -    WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); -} - -static int GetVEXmmmmm(u16 op) { -    // Currently, only 0x38 and 0x3A are used as secondary escape byte. -    if ((op >> 8) == 0x3A) -        return 3; -    if ((op >> 8) == 0x38) -        return 2; - -    return 1; -} - -static int GetVEXpp(u8 opPrefix) { -    if (opPrefix == 0x66) -        return 1; -    if (opPrefix == 0xF3) -        return 2; -    if (opPrefix == 0xF2) -        return 3; - -    return 0; -} - -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, -                          int extrabytes) { -    if (!Common::GetCPUCaps().avx) -        ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); -    int mmmmm = GetVEXmmmmm(op); -    int pp = GetVEXpp(opPrefix); -    // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size -    // here -    arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm); -    Write8(op & 0xFF); -    arg.WriteRest(this, extrabytes, regOp1); -} - -// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 -void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, -                          const OpArg& arg, int extrabytes) { -    if (size != 32 && size != 64) -        ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); -    int mmmmm = GetVEXmmmmm(op); -    int pp = GetVEXpp(opPrefix); -    arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64); -    Write8(op & 0xFF); -    arg.WriteRest(this, extrabytes, regOp1); -} - -void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, -                           const OpArg& arg, int extrabytes) { -    CheckFlags(); -    if (!Common::GetCPUCaps().bmi1) -        ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); -    WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); -} - -void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, -                           const OpArg& arg, int extrabytes) { -    CheckFlags(); -    if (!Common::GetCPUCaps().bmi2) -        ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer."); -    WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); -} - -void XEmitter::MOVD_xmm(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x6E, dest, arg, 0); -} -void XEmitter::MOVD_xmm(const OpArg& arg, X64Reg src) { -    WriteSSEOp(0x66, 0x7E, src, arg, 0); -} - -void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) { -#ifdef ARCHITECTURE_x86_64 -    // Alternate encoding -    // This does not display correctly in MSVC's debugger, it thinks it's a MOVD -    arg.operandReg = dest; -    Write8(0x66); -    arg.WriteRex(this, 64, 0); -    Write8(0x0f); -    Write8(0x6E); -    arg.WriteRest(this, 0); -#else -    arg.operandReg = dest; -    Write8(0xF3); -    Write8(0x0f); -    Write8(0x7E); -    arg.WriteRest(this, 0); -#endif -} - -void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) { -    if (src > 7 || arg.IsSimpleReg()) { -        // Alternate encoding -        // This does not display correctly in MSVC's debugger, it thinks it's a MOVD -        arg.operandReg = src; -        Write8(0x66); -        arg.WriteRex(this, 64, 0); -        Write8(0x0f); -        Write8(0x7E); -        arg.WriteRest(this, 0); -    } else { -        arg.operandReg = src; -        arg.WriteRex(this, 0, 0); -        Write8(0x66); -        Write8(0x0f); -        Write8(0xD6); -        arg.WriteRest(this, 0); -    } -} - -void XEmitter::WriteMXCSR(OpArg arg, int ext) { -    if (arg.IsImm() || arg.IsSimpleReg()) -        ASSERT_MSG(0, "MXCSR - invalid operand"); - -    arg.operandReg = ext; -    arg.WriteRex(this, 0, 0); -    Write8(0x0F); -    Write8(0xAE); -    arg.WriteRest(this); -} - -void XEmitter::STMXCSR(const OpArg& memloc) { -    WriteMXCSR(memloc, 3); -} -void XEmitter::LDMXCSR(const OpArg& memloc) { -    WriteMXCSR(memloc, 2); -} - -void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg); -} -void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x00, sseMOVNTP, regOp, arg); -} -void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x66, sseMOVNTP, regOp, arg); -} - -void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseADD, regOp, arg); -} -void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseADD, regOp, arg); -} -void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseSUB, regOp, arg); -} -void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseSUB, regOp, arg); -} -void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) { -    WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); -    Write8(compare); -} -void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) { -    WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); -    Write8(compare); -} -void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseMUL, regOp, arg); -} -void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseMUL, regOp, arg); -} -void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseDIV, regOp, arg); -} -void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseDIV, regOp, arg); -} -void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseMIN, regOp, arg); -} -void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseMIN, regOp, arg); -} -void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseMAX, regOp, arg); -} -void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseMAX, regOp, arg); -} -void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseSQRT, regOp, arg); -} -void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseSQRT, regOp, arg); -} -void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseRCP, regOp, arg); -} -void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseRSQRT, regOp, arg); -} - -void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseADD, regOp, arg); -} -void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseADD, regOp, arg); -} -void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseSUB, regOp, arg); -} -void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseSUB, regOp, arg); -} -void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) { -    WriteSSEOp(0x00, sseCMP, regOp, arg, 1); -    Write8(compare); -} -void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) { -    WriteSSEOp(0x66, sseCMP, regOp, arg, 1); -    Write8(compare); -} -void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseAND, regOp, arg); -} -void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseAND, regOp, arg); -} -void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseANDN, regOp, arg); -} -void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseANDN, regOp, arg); -} -void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseOR, regOp, arg); -} -void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseOR, regOp, arg); -} -void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseXOR, regOp, arg); -} -void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseXOR, regOp, arg); -} -void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseMUL, regOp, arg); -} -void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMUL, regOp, arg); -} -void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseDIV, regOp, arg); -} -void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseDIV, regOp, arg); -} -void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseMIN, regOp, arg); -} -void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMIN, regOp, arg); -} -void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseMAX, regOp, arg); -} -void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMAX, regOp, arg); -} -void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseSQRT, regOp, arg); -} -void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseSQRT, regOp, arg); -} -void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseRCP, regOp, arg); -} -void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseRSQRT, regOp, arg); -} -void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) { -    WriteSSEOp(0x00, sseSHUF, regOp, arg, 1); -    Write8(shuffle); -} -void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) { -    WriteSSEOp(0x66, sseSHUF, regOp, arg, 1); -    Write8(shuffle); -} - -void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseHADD, regOp, arg); -} - -void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseCOMIS, regOp, arg); -} // weird that these should be packed -void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseCOMIS, regOp, arg); -} // ordered -void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseUCOMIS, regOp, arg); -} // unordered -void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseUCOMIS, regOp, arg); -} - -void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg); -} -void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg); -} -void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg); -} -void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg); -} - -void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg); -} -void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg); -} - -void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg); -} -void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg); -} -void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg); -} -void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg); -} - -void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg); -} -void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg); -} - -void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); -} -void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); -} -void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); -} -void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); -} - -void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); -} -void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); -} -void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); -} -void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { -    WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); -} - -void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) { -    WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2)); -} -void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) { -    WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2)); -} - -void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, 0x5A, regOp, arg); -} -void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, 0x5A, regOp, arg); -} - -void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, 0x5A, regOp, arg); -} -void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, 0x5A, regOp, arg); -} -void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, 0x2D, regOp, arg); -} -void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, 0x2D, regOp, arg); -} -void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, 0x2A, regOp, arg); -} -void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, 0x2A, regOp, arg); -} - -void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, 0xE6, regOp, arg); -} -void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x00, 0x5B, regOp, arg); -} -void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, 0xE6, regOp, arg); -} -void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, 0x5B, regOp, arg); -} - -void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF2, 0x2C, regOp, arg); -} -void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, 0x2C, regOp, arg); -} -void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0xF3, 0x5B, regOp, arg); -} -void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) { -    WriteSSEOp(0x66, 0xE6, regOp, arg); -} - -void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) { -    WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src)); -} - -void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x00, 0x50, dest, arg); -} -void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x50, dest, arg); -} - -void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0xF2, sseLDDQU, dest, arg); -} // For integer data only - -// THESE TWO ARE UNTESTED. -void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x00, 0x14, dest, arg); -} -void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x00, 0x15, dest, arg); -} - -void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x14, dest, arg); -} -void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x15, dest, arg); -} - -void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) { -    if (Common::GetCPUCaps().sse3) { -        WriteSSEOp(0xF2, 0x12, regOp, arg); // SSE3 movddup -    } else { -        // Simulate this instruction with SSE2 instructions -        if (!arg.IsSimpleReg(regOp)) -            MOVSD(regOp, arg); -        UNPCKLPD(regOp, R(regOp)); -    } -} - -// There are a few more left - -// Also some integer instructions are missing -void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x6B, dest, arg); -} -void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x63, dest, arg); -} -void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x67, dest, arg); -} - -void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x60, dest, arg); -} -void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x61, dest, arg); -} -void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x62, dest, arg); -} -void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x6C, dest, arg); -} - -void XEmitter::PSRLW(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg)); -    Write8(shift); -} - -void XEmitter::PSRLD(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg)); -    Write8(shift); -} - -void XEmitter::PSRLQ(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg)); -    Write8(shift); -} - -void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) { -    WriteSSEOp(0x66, 0xd3, reg, arg); -} - -void XEmitter::PSRLDQ(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg)); -    Write8(shift); -} - -void XEmitter::PSLLW(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg)); -    Write8(shift); -} - -void XEmitter::PSLLD(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg)); -    Write8(shift); -} - -void XEmitter::PSLLQ(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg)); -    Write8(shift); -} - -void XEmitter::PSLLDQ(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg)); -    Write8(shift); -} - -void XEmitter::PSRAW(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg)); -    Write8(shift); -} - -void XEmitter::PSRAD(X64Reg reg, int shift) { -    WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg)); -    Write8(shift); -} - -void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { -    if (!Common::GetCPUCaps().ssse3) -        ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); -    WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); -} - -void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { -    if (!Common::GetCPUCaps().sse4_1) -        ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); -    WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); -} - -void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) { -    WriteSSSE3Op(0x66, 0x3800, dest, arg); -} -void XEmitter::PTEST(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3817, dest, arg); -} -void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x382b, dest, arg); -} -void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) { -    WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); -    Write8(mask); -} - -void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3838, dest, arg); -} -void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3839, dest, arg); -} -void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x383a, dest, arg); -} -void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x383b, dest, arg); -} -void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x383c, dest, arg); -} -void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x383d, dest, arg); -} -void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x383e, dest, arg); -} -void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x383f, dest, arg); -} - -void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3820, dest, arg); -} -void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3821, dest, arg); -} -void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3822, dest, arg); -} -void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3823, dest, arg); -} -void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3824, dest, arg); -} -void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3825, dest, arg); -} -void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3830, dest, arg); -} -void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3831, dest, arg); -} -void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3832, dest, arg); -} -void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3833, dest, arg); -} -void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3834, dest, arg); -} -void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3835, dest, arg); -} - -void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3810, dest, arg); -} -void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3814, dest, arg); -} -void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) { -    WriteSSE41Op(0x66, 0x3815, dest, arg); -} -void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { -    WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); -    Write8(blend); -} -void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { -    WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); -    Write8(blend); -} - -void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) { -    WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); -    Write8(mode); -} -void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) { -    WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); -    Write8(mode); -} -void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) { -    WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); -    Write8(mode); -} -void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) { -    WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); -    Write8(mode); -} - -void XEmitter::PAND(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xDB, dest, arg); -} -void XEmitter::PANDN(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xDF, dest, arg); -} -void XEmitter::PXOR(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xEF, dest, arg); -} -void XEmitter::POR(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xEB, dest, arg); -} - -void XEmitter::PADDB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xFC, dest, arg); -} -void XEmitter::PADDW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xFD, dest, arg); -} -void XEmitter::PADDD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xFE, dest, arg); -} -void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xD4, dest, arg); -} - -void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xEC, dest, arg); -} -void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xED, dest, arg); -} -void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xDC, dest, arg); -} -void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xDD, dest, arg); -} - -void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xF8, dest, arg); -} -void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xF9, dest, arg); -} -void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xFA, dest, arg); -} -void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xFB, dest, arg); -} - -void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xE8, dest, arg); -} -void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xE9, dest, arg); -} -void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xD8, dest, arg); -} -void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xD9, dest, arg); -} - -void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xE0, dest, arg); -} -void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xE3, dest, arg); -} - -void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x74, dest, arg); -} -void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x75, dest, arg); -} -void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x76, dest, arg); -} - -void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x64, dest, arg); -} -void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x65, dest, arg); -} -void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0x66, dest, arg); -} - -void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) { -    WriteSSEOp(0x66, 0xC5, dest, arg, 1); -    Write8(subreg); -} -void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) { -    WriteSSEOp(0x66, 0xC4, dest, arg, 1); -    Write8(subreg); -} - -void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xF5, dest, arg); -} -void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xF6, dest, arg); -} - -void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xEE, dest, arg); -} -void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xDE, dest, arg); -} -void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xEA, dest, arg); -} -void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xDA, dest, arg); -} - -void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) { -    WriteSSEOp(0x66, 0xD7, dest, arg); -} -void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) { -    WriteSSEOp(0x66, 0x70, regOp, arg, 1); -    Write8(shuffle); -} -void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) { -    WriteSSEOp(0xF2, 0x70, regOp, arg, 1); -    Write8(shuffle); -} -void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) { -    WriteSSEOp(0xF3, 0x70, regOp, arg, 1); -    Write8(shuffle); -} - -// VEX -void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg); -} -void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg); -} -void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg); -} -void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg); -} -void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg); -} -void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg); -} -void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg); -} -void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg); -} -void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg); -} -void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) { -    WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); -    Write8(shuffle); -} -void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg); -} -void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg); -} - -void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); -} -void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); -} -void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); -} -void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); -} -void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); -} -void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); -} -void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); -} -void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); -} - -void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); -} -void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); -} -void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); -} -void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); -} - -void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); -} -void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); -} -void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); -} -void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); -} -void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); -} -void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); -} -void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); -} -void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); -} -void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); -} -void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); -} -void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); -} -void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); -} -void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); -} - -void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { -    WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { -    WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { -    WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) { -    WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); -    Write8(rotate); -} -void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg); -} -void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg); -} -void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg); -} -void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { -    WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg); -} -void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) { -    WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg); -} -void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) { -    WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg); -} -void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) { -    WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg); -} -void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { -    WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { -    WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg); -} - -// Prefixes - -void XEmitter::LOCK() { -    Write8(0xF0); -} -void XEmitter::REP() { -    Write8(0xF3); -} -void XEmitter::REPNE() { -    Write8(0xF2); -} -void XEmitter::FSOverride() { -    Write8(0x64); -} -void XEmitter::GSOverride() { -    Write8(0x65); -} - -void XEmitter::FWAIT() { -    Write8(0x9B); -} - -// TODO: make this more generic -void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) { -    int mf = 0; -    ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), -               "WriteFloatLoadStore: 80 bits not supported for this instruction"); -    switch (bits) { -    case 32: -        mf = 0; -        break; -    case 64: -        mf = 4; -        break; -    case 80: -        mf = 2; -        break; -    default: -        ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); -    } -    Write8(0xd9 | mf); -    // x87 instructions use the reg field of the ModR/M byte as opcode: -    if (bits == 80) -        op = op_80b; -    arg.WriteRest(this, 0, (X64Reg)op); -} - -void XEmitter::FLD(int bits, const OpArg& src) { -    WriteFloatLoadStore(bits, floatLD, floatLD80, src); -} -void XEmitter::FST(int bits, const OpArg& dest) { -    WriteFloatLoadStore(bits, floatST, floatINVALID, dest); -} -void XEmitter::FSTP(int bits, const OpArg& dest) { -    WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest); -} -void XEmitter::FNSTSW_AX() { -    Write8(0xDF); -    Write8(0xE0); -} - -void XEmitter::RDTSC() { -    Write8(0x0F); -    Write8(0x31); -} - -void XCodeBlock::PoisonMemory() { -    // x86/64: 0xCC = breakpoint -    memset(region, 0xCC, region_size); -} -} diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h deleted file mode 100644 index 7d7cdde16..000000000 --- a/src/common/x64/emitter.h +++ /dev/null @@ -1,1206 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#pragma once - -#include <cstddef> -#include "common/assert.h" -#include "common/bit_set.h" -#include "common/code_block.h" -#include "common/common_types.h" - -#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64) -#define _ARCH_64 -#endif - -#ifdef _ARCH_64 -#define PTRBITS 64 -#else -#define PTRBITS 32 -#endif - -namespace Gen { - -enum X64Reg { -    EAX = 0, -    EBX = 3, -    ECX = 1, -    EDX = 2, -    ESI = 6, -    EDI = 7, -    EBP = 5, -    ESP = 4, - -    RAX = 0, -    RBX = 3, -    RCX = 1, -    RDX = 2, -    RSI = 6, -    RDI = 7, -    RBP = 5, -    RSP = 4, -    R8 = 8, -    R9 = 9, -    R10 = 10, -    R11 = 11, -    R12 = 12, -    R13 = 13, -    R14 = 14, -    R15 = 15, - -    AL = 0, -    BL = 3, -    CL = 1, -    DL = 2, -    SIL = 6, -    DIL = 7, -    BPL = 5, -    SPL = 4, -    AH = 0x104, -    BH = 0x107, -    CH = 0x105, -    DH = 0x106, - -    AX = 0, -    BX = 3, -    CX = 1, -    DX = 2, -    SI = 6, -    DI = 7, -    BP = 5, -    SP = 4, - -    XMM0 = 0, -    XMM1, -    XMM2, -    XMM3, -    XMM4, -    XMM5, -    XMM6, -    XMM7, -    XMM8, -    XMM9, -    XMM10, -    XMM11, -    XMM12, -    XMM13, -    XMM14, -    XMM15, - -    YMM0 = 0, -    YMM1, -    YMM2, -    YMM3, -    YMM4, -    YMM5, -    YMM6, -    YMM7, -    YMM8, -    YMM9, -    YMM10, -    YMM11, -    YMM12, -    YMM13, -    YMM14, -    YMM15, - -    INVALID_REG = 0xFFFFFFFF -}; - -enum CCFlags { -    CC_O = 0, -    CC_NO = 1, -    CC_B = 2, -    CC_C = 2, -    CC_NAE = 2, -    CC_NB = 3, -    CC_NC = 3, -    CC_AE = 3, -    CC_Z = 4, -    CC_E = 4, -    CC_NZ = 5, -    CC_NE = 5, -    CC_BE = 6, -    CC_NA = 6, -    CC_NBE = 7, -    CC_A = 7, -    CC_S = 8, -    CC_NS = 9, -    CC_P = 0xA, -    CC_PE = 0xA, -    CC_NP = 0xB, -    CC_PO = 0xB, -    CC_L = 0xC, -    CC_NGE = 0xC, -    CC_NL = 0xD, -    CC_GE = 0xD, -    CC_LE = 0xE, -    CC_NG = 0xE, -    CC_NLE = 0xF, -    CC_G = 0xF -}; - -enum { -    NUMGPRs = 16, -    NUMXMMs = 16, -}; - -enum { -    SCALE_NONE = 0, -    SCALE_1 = 1, -    SCALE_2 = 2, -    SCALE_4 = 4, -    SCALE_8 = 8, -    SCALE_ATREG = 16, -    // SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG -    SCALE_NOBASE_2 = 34, -    SCALE_NOBASE_4 = 36, -    SCALE_NOBASE_8 = 40, -    SCALE_RIP = 0xFF, -    SCALE_IMM8 = 0xF0, -    SCALE_IMM16 = 0xF1, -    SCALE_IMM32 = 0xF2, -    SCALE_IMM64 = 0xF3, -}; - -enum NormalOp { -    nrmADD, -    nrmADC, -    nrmSUB, -    nrmSBB, -    nrmAND, -    nrmOR, -    nrmXOR, -    nrmMOV, -    nrmTEST, -    nrmCMP, -    nrmXCHG, -}; - -enum { -    CMP_EQ = 0, -    CMP_LT = 1, -    CMP_LE = 2, -    CMP_UNORD = 3, -    CMP_NEQ = 4, -    CMP_NLT = 5, -    CMP_NLE = 6, -    CMP_ORD = 7, -}; - -enum FloatOp { -    floatLD = 0, -    floatST = 2, -    floatSTP = 3, -    floatLD80 = 5, -    floatSTP80 = 7, - -    floatINVALID = -1, -}; - -enum FloatRound { -    FROUND_NEAREST = 0, -    FROUND_FLOOR = 1, -    FROUND_CEIL = 2, -    FROUND_ZERO = 3, -    FROUND_MXCSR = 4, - -    FROUND_RAISE_PRECISION = 0, -    FROUND_IGNORE_PRECISION = 8, -}; - -class XEmitter; - -// RIP addressing does not benefit from micro op fusion on Core arch -struct OpArg { -    friend class XEmitter; - -    constexpr OpArg() = default; // dummy op arg, used for storage -    constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX) -        : scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)), -          indexReg(static_cast<u16>(scaledReg)), offset(offset_) {} - -    constexpr bool operator==(const OpArg& b) const { -        return operandReg == b.operandReg && scale == b.scale && -               offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset; -    } - -    void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const; -    void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, -                  int W = 0) const; -    void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG, -                   bool warn_64bit_offset = true) const; -    void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits); -    void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, -                       int bits) const; - -    constexpr bool IsImm() const { -        return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || -               scale == SCALE_IMM64; -    } -    constexpr bool IsSimpleReg() const { -        return scale == SCALE_NONE; -    } -    constexpr bool IsSimpleReg(X64Reg reg) const { -        return IsSimpleReg() && GetSimpleReg() == reg; -    } - -    int GetImmBits() const { -        switch (scale) { -        case SCALE_IMM8: -            return 8; -        case SCALE_IMM16: -            return 16; -        case SCALE_IMM32: -            return 32; -        case SCALE_IMM64: -            return 64; -        default: -            return -1; -        } -    } - -    void SetImmBits(int bits) { -        switch (bits) { -        case 8: -            scale = SCALE_IMM8; -            break; -        case 16: -            scale = SCALE_IMM16; -            break; -        case 32: -            scale = SCALE_IMM32; -            break; -        case 64: -            scale = SCALE_IMM64; -            break; -        } -    } - -    constexpr X64Reg GetSimpleReg() const { -        return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG; -    } - -    constexpr u32 GetImmValue() const { -        return static_cast<u32>(offset); -    } - -    // For loops. -    void IncreaseOffset(int sz) { -        offset += sz; -    } - -private: -    u8 scale = 0; -    u16 offsetOrBaseReg = 0; -    u16 indexReg = 0; -    u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available. -    u16 operandReg = 0; -}; - -template <typename T> -inline OpArg M(const T* ptr) { -    return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP)); -} -constexpr OpArg R(X64Reg value) { -    return OpArg(0, SCALE_NONE, value); -} -constexpr OpArg MatR(X64Reg value) { -    return OpArg(0, SCALE_ATREG, value); -} - -constexpr OpArg MDisp(X64Reg value, int offset) { -    return OpArg(static_cast<u32>(offset), SCALE_ATREG, value); -} - -constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) { -    return OpArg(offset, scale, base, scaled); -} - -constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) { -    return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled) -                            : OpArg(offset, scale | 0x20, RAX, scaled); -} - -constexpr OpArg MRegSum(X64Reg base, X64Reg offset) { -    return MComplex(base, offset, 1, 0); -} - -constexpr OpArg Imm8(u8 imm) { -    return OpArg(imm, SCALE_IMM8); -} -constexpr OpArg Imm16(u16 imm) { -    return OpArg(imm, SCALE_IMM16); -} // rarely used -constexpr OpArg Imm32(u32 imm) { -    return OpArg(imm, SCALE_IMM32); -} -constexpr OpArg Imm64(u64 imm) { -    return OpArg(imm, SCALE_IMM64); -} -constexpr OpArg UImmAuto(u32 imm) { -    return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8); -} -constexpr OpArg SImmAuto(s32 imm) { -    return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8); -} - -template <typename T> -OpArg ImmPtr(const T* imm) { -#ifdef _ARCH_64 -    return Imm64(reinterpret_cast<u64>(imm)); -#else -    return Imm32(reinterpret_cast<u32>(imm)); -#endif -} - -inline u32 PtrOffset(const void* ptr, const void* base) { -#ifdef _ARCH_64 -    s64 distance = (s64)ptr - (s64)base; -    if (distance >= 0x80000000LL || distance < -0x80000000LL) { -        ASSERT_MSG(0, "pointer offset out of range"); -        return 0; -    } - -    return (u32)distance; -#else -    return (u32)ptr - (u32)base; -#endif -} - -// usage: int a[]; ARRAY_OFFSET(a,10) -#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0])) -// usage: struct {int e;} s; STRUCT_OFFSET(s,e) -#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) - -struct FixupBranch { -    u8* ptr; -    int type; // 0 = 8bit 1 = 32bit -}; - -enum SSECompare { -    EQ = 0, -    LT, -    LE, -    UNORD, -    NEQ, -    NLT, -    NLE, -    ORD, -}; - -class XEmitter { -    friend struct OpArg; // for Write8 etc -private: -    u8* code; -    bool flags_locked; - -    void CheckFlags(); - -    void Rex(int w, int r, int x, int b); -    void WriteSimple1Byte(int bits, u8 byte, X64Reg reg); -    void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); -    void WriteMulDivType(int bits, OpArg src, int ext); -    void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); -    void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext); -    void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext); -    void WriteMXCSR(OpArg arg, int ext); -    void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); -    void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); -    void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); -    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); -    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, -                    int extrabytes = 0); -    void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, -                    int extrabytes = 0); -    void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, -                     int extrabytes = 0); -    void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, -                     int extrabytes = 0); -    void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); -    void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); - -    void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, -                                size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); - -protected: -    void Write8(u8 value); -    void Write16(u16 value); -    void Write32(u32 value); -    void Write64(u64 value); - -public: -    XEmitter() { -        code = nullptr; -        flags_locked = false; -    } -    XEmitter(u8* code_ptr) { -        code = code_ptr; -        flags_locked = false; -    } -    virtual ~XEmitter() {} - -    void WriteModRM(int mod, int rm, int reg); -    void WriteSIB(int scale, int index, int base); - -    void SetCodePtr(u8* ptr); -    void ReserveCodeSpace(int bytes); -    const u8* AlignCode4(); -    const u8* AlignCode16(); -    const u8* AlignCodePage(); -    const u8* GetCodePtr() const; -    u8* GetWritableCodePtr(); - -    void LockFlags() { -        flags_locked = true; -    } -    void UnlockFlags() { -        flags_locked = false; -    } - -    // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU -    // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other -    // string instr., -    // INC and DEC are slow on Intel Core, but not on AMD. They create a -    // false flag dependency because they only update a subset of the flags. -    // XCHG is SLOW and should be avoided. - -    // Debug breakpoint -    void INT3(); - -    // Do nothing -    void NOP(size_t count = 1); - -    // Save energy in wait-loops on P4 only. Probably not too useful. -    void PAUSE(); - -    // Flag control -    void STC(); -    void CLC(); -    void CMC(); - -    // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and -    // AMD! -    void LAHF(); // 3 cycle vector path -    void SAHF(); // direct path fast - -    // Stack control -    void PUSH(X64Reg reg); -    void POP(X64Reg reg); -    void PUSH(int bits, const OpArg& reg); -    void POP(int bits, const OpArg& reg); -    void PUSHF(); -    void POPF(); - -    // Flow control -    void RET(); -    void RET_FAST(); -    void UD2(); -    FixupBranch J(bool force5bytes = false); - -    void JMP(const u8* addr, bool force5Bytes = false); -    void JMPptr(const OpArg& arg); -    void JMPself(); // infinite loop! -#ifdef CALL -#undef CALL -#endif -    void CALL(const void* fnptr); -    FixupBranch CALL(); -    void CALLptr(OpArg arg); - -    FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); -    void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); - -    void SetJumpTarget(const FixupBranch& branch); -    void SetJumpTarget(const FixupBranch& branch, const u8* target); - -    void SETcc(CCFlags flag, OpArg dest); -    // Note: CMOV brings small if any benefit on current cpus. -    void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag); - -    // Fences -    void LFENCE(); -    void MFENCE(); -    void SFENCE(); - -    // Bit scan -    void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit -    void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit - -    // Cache control -    enum PrefetchLevel { -        PF_NTA, // Non-temporal (data used once and only once) -        PF_T0,  // All cache levels -        PF_T1,  // Levels 2+ (aliased to T0 on AMD) -        PF_T2,  // Levels 3+ (aliased to T0 on AMD) -    }; -    void PREFETCH(PrefetchLevel level, OpArg arg); -    void MOVNTI(int bits, const OpArg& dest, X64Reg src); -    void MOVNTDQ(const OpArg& arg, X64Reg regOp); -    void MOVNTPS(const OpArg& arg, X64Reg regOp); -    void MOVNTPD(const OpArg& arg, X64Reg regOp); - -    // Multiplication / division -    void MUL(int bits, const OpArg& src);  // UNSIGNED -    void IMUL(int bits, const OpArg& src); // SIGNED -    void IMUL(int bits, X64Reg regOp, const OpArg& src); -    void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); -    void DIV(int bits, const OpArg& src); -    void IDIV(int bits, const OpArg& src); - -    // Shift -    void ROL(int bits, const OpArg& dest, const OpArg& shift); -    void ROR(int bits, const OpArg& dest, const OpArg& shift); -    void RCL(int bits, const OpArg& dest, const OpArg& shift); -    void RCR(int bits, const OpArg& dest, const OpArg& shift); -    void SHL(int bits, const OpArg& dest, const OpArg& shift); -    void SHR(int bits, const OpArg& dest, const OpArg& shift); -    void SAR(int bits, const OpArg& dest, const OpArg& shift); - -    // Bit Test -    void BT(int bits, const OpArg& dest, const OpArg& index); -    void BTS(int bits, const OpArg& dest, const OpArg& index); -    void BTR(int bits, const OpArg& dest, const OpArg& index); -    void BTC(int bits, const OpArg& dest, const OpArg& index); - -    // Double-Precision Shift -    void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); -    void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); - -    // Extend EAX into EDX in various ways -    void CWD(int bits = 16); -    void CDQ() { -        CWD(32); -    } -    void CQO() { -        CWD(64); -    } -    void CBW(int bits = 8); -    void CWDE() { -        CBW(16); -    } -    void CDQE() { -        CBW(32); -    } - -    // Load effective address -    void LEA(int bits, X64Reg dest, OpArg src); - -    // Integer arithmetic -    void NEG(int bits, const OpArg& src); -    void ADD(int bits, const OpArg& a1, const OpArg& a2); -    void ADC(int bits, const OpArg& a1, const OpArg& a2); -    void SUB(int bits, const OpArg& a1, const OpArg& a2); -    void SBB(int bits, const OpArg& a1, const OpArg& a2); -    void AND(int bits, const OpArg& a1, const OpArg& a2); -    void CMP(int bits, const OpArg& a1, const OpArg& a2); - -    // Bit operations -    void NOT(int bits, const OpArg& src); -    void OR(int bits, const OpArg& a1, const OpArg& a2); -    void XOR(int bits, const OpArg& a1, const OpArg& a2); -    void MOV(int bits, const OpArg& a1, const OpArg& a2); -    void TEST(int bits, const OpArg& a1, const OpArg& a2); - -    // Are these useful at all? Consider removing. -    void XCHG(int bits, const OpArg& a1, const OpArg& a2); -    void XCHG_AHAL(); - -    // Byte swapping (32 and 64-bit only). -    void BSWAP(int bits, X64Reg reg); - -    // Sign/zero extension -    void MOVSX(int dbits, int sbits, X64Reg dest, -               OpArg src); // automatically uses MOVSXD if necessary -    void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); - -    // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe. -    void MOVBE(int dbits, const OpArg& dest, const OpArg& src); - -    // Available only on AMD >= Phenom or Intel >= Haswell -    void LZCNT(int bits, X64Reg dest, const OpArg& src); -    // Note: this one is actually part of BMI1 -    void TZCNT(int bits, X64Reg dest, const OpArg& src); - -    // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) -    void STMXCSR(const OpArg& memloc); -    void LDMXCSR(const OpArg& memloc); - -    // Prefixes -    void LOCK(); -    void REP(); -    void REPNE(); -    void FSOverride(); -    void GSOverride(); - -    // x87 -    enum x87StatusWordBits { -        x87_InvalidOperation = 0x1, -        x87_DenormalizedOperand = 0x2, -        x87_DivisionByZero = 0x4, -        x87_Overflow = 0x8, -        x87_Underflow = 0x10, -        x87_Precision = 0x20, -        x87_StackFault = 0x40, -        x87_ErrorSummary = 0x80, -        x87_C0 = 0x100, -        x87_C1 = 0x200, -        x87_C2 = 0x400, -        x87_TopOfStack = 0x2000 | 0x1000 | 0x800, -        x87_C3 = 0x4000, -        x87_FPUBusy = 0x8000, -    }; - -    void FLD(int bits, const OpArg& src); -    void FST(int bits, const OpArg& dest); -    void FSTP(int bits, const OpArg& dest); -    void FNSTSW_AX(); -    void FWAIT(); - -    // SSE/SSE2: Floating point arithmetic -    void ADDSS(X64Reg regOp, const OpArg& arg); -    void ADDSD(X64Reg regOp, const OpArg& arg); -    void SUBSS(X64Reg regOp, const OpArg& arg); -    void SUBSD(X64Reg regOp, const OpArg& arg); -    void MULSS(X64Reg regOp, const OpArg& arg); -    void MULSD(X64Reg regOp, const OpArg& arg); -    void DIVSS(X64Reg regOp, const OpArg& arg); -    void DIVSD(X64Reg regOp, const OpArg& arg); -    void MINSS(X64Reg regOp, const OpArg& arg); -    void MINSD(X64Reg regOp, const OpArg& arg); -    void MAXSS(X64Reg regOp, const OpArg& arg); -    void MAXSD(X64Reg regOp, const OpArg& arg); -    void SQRTSS(X64Reg regOp, const OpArg& arg); -    void SQRTSD(X64Reg regOp, const OpArg& arg); -    void RCPSS(X64Reg regOp, const OpArg& arg); -    void RSQRTSS(X64Reg regOp, const OpArg& arg); - -    // SSE/SSE2: Floating point bitwise (yes) -    void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); -    void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); - -    void CMPEQSS(X64Reg regOp, const OpArg& arg) { -        CMPSS(regOp, arg, CMP_EQ); -    } -    void CMPLTSS(X64Reg regOp, const OpArg& arg) { -        CMPSS(regOp, arg, CMP_LT); -    } -    void CMPLESS(X64Reg regOp, const OpArg& arg) { -        CMPSS(regOp, arg, CMP_LE); -    } -    void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { -        CMPSS(regOp, arg, CMP_UNORD); -    } -    void CMPNEQSS(X64Reg regOp, const OpArg& arg) { -        CMPSS(regOp, arg, CMP_NEQ); -    } -    void CMPNLTSS(X64Reg regOp, const OpArg& arg) { -        CMPSS(regOp, arg, CMP_NLT); -    } -    void CMPORDSS(X64Reg regOp, const OpArg& arg) { -        CMPSS(regOp, arg, CMP_ORD); -    } - -    // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) -    void ADDPS(X64Reg regOp, const OpArg& arg); -    void ADDPD(X64Reg regOp, const OpArg& arg); -    void SUBPS(X64Reg regOp, const OpArg& arg); -    void SUBPD(X64Reg regOp, const OpArg& arg); -    void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare); -    void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare); -    void MULPS(X64Reg regOp, const OpArg& arg); -    void MULPD(X64Reg regOp, const OpArg& arg); -    void DIVPS(X64Reg regOp, const OpArg& arg); -    void DIVPD(X64Reg regOp, const OpArg& arg); -    void MINPS(X64Reg regOp, const OpArg& arg); -    void MINPD(X64Reg regOp, const OpArg& arg); -    void MAXPS(X64Reg regOp, const OpArg& arg); -    void MAXPD(X64Reg regOp, const OpArg& arg); -    void SQRTPS(X64Reg regOp, const OpArg& arg); -    void SQRTPD(X64Reg regOp, const OpArg& arg); -    void RCPPS(X64Reg regOp, const OpArg& arg); -    void RSQRTPS(X64Reg regOp, const OpArg& arg); - -    // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) -    void ANDPS(X64Reg regOp, const OpArg& arg); -    void ANDPD(X64Reg regOp, const OpArg& arg); -    void ANDNPS(X64Reg regOp, const OpArg& arg); -    void ANDNPD(X64Reg regOp, const OpArg& arg); -    void ORPS(X64Reg regOp, const OpArg& arg); -    void ORPD(X64Reg regOp, const OpArg& arg); -    void XORPS(X64Reg regOp, const OpArg& arg); -    void XORPD(X64Reg regOp, const OpArg& arg); - -    // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. -    void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); -    void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); - -    // SSE/SSE2: Useful alternative to shuffle in some cases. -    void MOVDDUP(X64Reg regOp, const OpArg& arg); - -    // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily -    // on Ivy. -    void HADDPS(X64Reg dest, const OpArg& src); - -    // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg -    // contains both a read mask and a write "mask". -    void DPPS(X64Reg dest, const OpArg& src, u8 arg); - -    void UNPCKLPS(X64Reg dest, const OpArg& src); -    void UNPCKHPS(X64Reg dest, const OpArg& src); -    void UNPCKLPD(X64Reg dest, const OpArg& src); -    void UNPCKHPD(X64Reg dest, const OpArg& src); - -    // SSE/SSE2: Compares. -    void COMISS(X64Reg regOp, const OpArg& arg); -    void COMISD(X64Reg regOp, const OpArg& arg); -    void UCOMISS(X64Reg regOp, const OpArg& arg); -    void UCOMISD(X64Reg regOp, const OpArg& arg); - -    // SSE/SSE2: Moves. Use the right data type for your data, in most cases. -    void MOVAPS(X64Reg regOp, const OpArg& arg); -    void MOVAPD(X64Reg regOp, const OpArg& arg); -    void MOVAPS(const OpArg& arg, X64Reg regOp); -    void MOVAPD(const OpArg& arg, X64Reg regOp); - -    void MOVUPS(X64Reg regOp, const OpArg& arg); -    void MOVUPD(X64Reg regOp, const OpArg& arg); -    void MOVUPS(const OpArg& arg, X64Reg regOp); -    void MOVUPD(const OpArg& arg, X64Reg regOp); - -    void MOVDQA(X64Reg regOp, const OpArg& arg); -    void MOVDQA(const OpArg& arg, X64Reg regOp); -    void MOVDQU(X64Reg regOp, const OpArg& arg); -    void MOVDQU(const OpArg& arg, X64Reg regOp); - -    void MOVSS(X64Reg regOp, const OpArg& arg); -    void MOVSD(X64Reg regOp, const OpArg& arg); -    void MOVSS(const OpArg& arg, X64Reg regOp); -    void MOVSD(const OpArg& arg, X64Reg regOp); - -    void MOVLPS(X64Reg regOp, const OpArg& arg); -    void MOVLPD(X64Reg regOp, const OpArg& arg); -    void MOVLPS(const OpArg& arg, X64Reg regOp); -    void MOVLPD(const OpArg& arg, X64Reg regOp); - -    void MOVHPS(X64Reg regOp, const OpArg& arg); -    void MOVHPD(X64Reg regOp, const OpArg& arg); -    void MOVHPS(const OpArg& arg, X64Reg regOp); -    void MOVHPD(const OpArg& arg, X64Reg regOp); - -    void MOVHLPS(X64Reg regOp1, X64Reg regOp2); -    void MOVLHPS(X64Reg regOp1, X64Reg regOp2); - -    void MOVD_xmm(X64Reg dest, const OpArg& arg); -    void MOVQ_xmm(X64Reg dest, OpArg arg); -    void MOVD_xmm(const OpArg& arg, X64Reg src); -    void MOVQ_xmm(OpArg arg, X64Reg src); - -    // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in -    // question. -    void MOVMSKPS(X64Reg dest, const OpArg& arg); -    void MOVMSKPD(X64Reg dest, const OpArg& arg); - -    // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a -    // weird one. -    void MASKMOVDQU(X64Reg dest, X64Reg src); -    void LDDQU(X64Reg dest, const OpArg& src); - -    // SSE/SSE2: Data type conversions. -    void CVTPS2PD(X64Reg dest, const OpArg& src); -    void CVTPD2PS(X64Reg dest, const OpArg& src); -    void CVTSS2SD(X64Reg dest, const OpArg& src); -    void CVTSI2SS(X64Reg dest, const OpArg& src); -    void CVTSD2SS(X64Reg dest, const OpArg& src); -    void CVTSI2SD(X64Reg dest, const OpArg& src); -    void CVTDQ2PD(X64Reg regOp, const OpArg& arg); -    void CVTPD2DQ(X64Reg regOp, const OpArg& arg); -    void CVTDQ2PS(X64Reg regOp, const OpArg& arg); -    void CVTPS2DQ(X64Reg regOp, const OpArg& arg); - -    void CVTTPS2DQ(X64Reg regOp, const OpArg& arg); -    void CVTTPD2DQ(X64Reg regOp, const OpArg& arg); - -    // Destinations are X64 regs (rax, rbx, ...) for these instructions. -    void CVTSS2SI(X64Reg xregdest, const OpArg& src); -    void CVTSD2SI(X64Reg xregdest, const OpArg& src); -    void CVTTSS2SI(X64Reg xregdest, const OpArg& arg); -    void CVTTSD2SI(X64Reg xregdest, const OpArg& arg); - -    // SSE2: Packed integer instructions -    void PACKSSDW(X64Reg dest, const OpArg& arg); -    void PACKSSWB(X64Reg dest, const OpArg& arg); -    void PACKUSDW(X64Reg dest, const OpArg& arg); -    void PACKUSWB(X64Reg dest, const OpArg& arg); - -    void PUNPCKLBW(X64Reg dest, const OpArg& arg); -    void PUNPCKLWD(X64Reg dest, const OpArg& arg); -    void PUNPCKLDQ(X64Reg dest, const OpArg& arg); -    void PUNPCKLQDQ(X64Reg dest, const OpArg& arg); - -    void PTEST(X64Reg dest, const OpArg& arg); -    void PAND(X64Reg dest, const OpArg& arg); -    void PANDN(X64Reg dest, const OpArg& arg); -    void PXOR(X64Reg dest, const OpArg& arg); -    void POR(X64Reg dest, const OpArg& arg); - -    void PADDB(X64Reg dest, const OpArg& arg); -    void PADDW(X64Reg dest, const OpArg& arg); -    void PADDD(X64Reg dest, const OpArg& arg); -    void PADDQ(X64Reg dest, const OpArg& arg); - -    void PADDSB(X64Reg dest, const OpArg& arg); -    void PADDSW(X64Reg dest, const OpArg& arg); -    void PADDUSB(X64Reg dest, const OpArg& arg); -    void PADDUSW(X64Reg dest, const OpArg& arg); - -    void PSUBB(X64Reg dest, const OpArg& arg); -    void PSUBW(X64Reg dest, const OpArg& arg); -    void PSUBD(X64Reg dest, const OpArg& arg); -    void PSUBQ(X64Reg dest, const OpArg& arg); - -    void PSUBSB(X64Reg dest, const OpArg& arg); -    void PSUBSW(X64Reg dest, const OpArg& arg); -    void PSUBUSB(X64Reg dest, const OpArg& arg); -    void PSUBUSW(X64Reg dest, const OpArg& arg); - -    void PAVGB(X64Reg dest, const OpArg& arg); -    void PAVGW(X64Reg dest, const OpArg& arg); - -    void PCMPEQB(X64Reg dest, const OpArg& arg); -    void PCMPEQW(X64Reg dest, const OpArg& arg); -    void PCMPEQD(X64Reg dest, const OpArg& arg); - -    void PCMPGTB(X64Reg dest, const OpArg& arg); -    void PCMPGTW(X64Reg dest, const OpArg& arg); -    void PCMPGTD(X64Reg dest, const OpArg& arg); - -    void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg); -    void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg); - -    void PMADDWD(X64Reg dest, const OpArg& arg); -    void PSADBW(X64Reg dest, const OpArg& arg); - -    void PMAXSW(X64Reg dest, const OpArg& arg); -    void PMAXUB(X64Reg dest, const OpArg& arg); -    void PMINSW(X64Reg dest, const OpArg& arg); -    void PMINUB(X64Reg dest, const OpArg& arg); -    // SSE4: More MAX/MIN instructions. -    void PMINSB(X64Reg dest, const OpArg& arg); -    void PMINSD(X64Reg dest, const OpArg& arg); -    void PMINUW(X64Reg dest, const OpArg& arg); -    void PMINUD(X64Reg dest, const OpArg& arg); -    void PMAXSB(X64Reg dest, const OpArg& arg); -    void PMAXSD(X64Reg dest, const OpArg& arg); -    void PMAXUW(X64Reg dest, const OpArg& arg); -    void PMAXUD(X64Reg dest, const OpArg& arg); - -    void PMOVMSKB(X64Reg dest, const OpArg& arg); -    void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle); -    void PSHUFB(X64Reg dest, const OpArg& arg); - -    void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle); -    void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle); - -    void PSRLW(X64Reg reg, int shift); -    void PSRLD(X64Reg reg, int shift); -    void PSRLQ(X64Reg reg, int shift); -    void PSRLQ(X64Reg reg, const OpArg& arg); -    void PSRLDQ(X64Reg reg, int shift); - -    void PSLLW(X64Reg reg, int shift); -    void PSLLD(X64Reg reg, int shift); -    void PSLLQ(X64Reg reg, int shift); -    void PSLLDQ(X64Reg reg, int shift); - -    void PSRAW(X64Reg reg, int shift); -    void PSRAD(X64Reg reg, int shift); - -    // SSE4: data type conversions -    void PMOVSXBW(X64Reg dest, const OpArg& arg); -    void PMOVSXBD(X64Reg dest, const OpArg& arg); -    void PMOVSXBQ(X64Reg dest, const OpArg& arg); -    void PMOVSXWD(X64Reg dest, const OpArg& arg); -    void PMOVSXWQ(X64Reg dest, const OpArg& arg); -    void PMOVSXDQ(X64Reg dest, const OpArg& arg); -    void PMOVZXBW(X64Reg dest, const OpArg& arg); -    void PMOVZXBD(X64Reg dest, const OpArg& arg); -    void PMOVZXBQ(X64Reg dest, const OpArg& arg); -    void PMOVZXWD(X64Reg dest, const OpArg& arg); -    void PMOVZXWQ(X64Reg dest, const OpArg& arg); -    void PMOVZXDQ(X64Reg dest, const OpArg& arg); - -    // SSE4: variable blend instructions (xmm0 implicit argument) -    void PBLENDVB(X64Reg dest, const OpArg& arg); -    void BLENDVPS(X64Reg dest, const OpArg& arg); -    void BLENDVPD(X64Reg dest, const OpArg& arg); -    void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); -    void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); - -    // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) -    void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode); -    void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode); -    void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); -    void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); - -    void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { -        ROUNDSS(dest, arg, FROUND_NEAREST); -    } -    void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { -        ROUNDSS(dest, arg, FROUND_FLOOR); -    } -    void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { -        ROUNDSS(dest, arg, FROUND_CEIL); -    } -    void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { -        ROUNDSS(dest, arg, FROUND_ZERO); -    } - -    void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { -        ROUNDSD(dest, arg, FROUND_NEAREST); -    } -    void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { -        ROUNDSD(dest, arg, FROUND_FLOOR); -    } -    void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { -        ROUNDSD(dest, arg, FROUND_CEIL); -    } -    void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { -        ROUNDSD(dest, arg, FROUND_ZERO); -    } - -    void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { -        ROUNDPS(dest, arg, FROUND_NEAREST); -    } -    void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { -        ROUNDPS(dest, arg, FROUND_FLOOR); -    } -    void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { -        ROUNDPS(dest, arg, FROUND_CEIL); -    } -    void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { -        ROUNDPS(dest, arg, FROUND_ZERO); -    } - -    void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { -        ROUNDPD(dest, arg, FROUND_NEAREST); -    } -    void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { -        ROUNDPD(dest, arg, FROUND_FLOOR); -    } -    void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { -        ROUNDPD(dest, arg, FROUND_CEIL); -    } -    void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { -        ROUNDPD(dest, arg, FROUND_ZERO); -    } - -    // AVX -    void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle); -    void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - -    void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - -    void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - -    // FMA3 -    void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - -    // VEX GPR instructions -    void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); -    void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); -    void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); -    void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate); -    void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); -    void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); -    void BLSR(int bits, X64Reg regOp, const OpArg& arg); -    void BLSMSK(int bits, X64Reg regOp, const OpArg& arg); -    void BLSI(int bits, X64Reg regOp, const OpArg& arg); -    void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); -    void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - -    void RDTSC(); - -    // Utility functions -    // The difference between this and CALL is that this aligns the stack -    // where appropriate. -    void ABI_CallFunction(const void* func); -    template <typename T> -    void ABI_CallFunction(T (*func)()) { -        ABI_CallFunction((const void*)func); -    } - -    void ABI_CallFunction(const u8* func) { -        ABI_CallFunction((const void*)func); -    } -    void ABI_CallFunctionC16(const void* func, u16 param1); -    void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); - -    // These only support u32 parameters, but that's enough for a lot of uses. -    // These will destroy the 1 or 2 first "parameter regs". -    void ABI_CallFunctionC(const void* func, u32 param1); -    void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2); -    void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3); -    void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3); -    void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4); -    void ABI_CallFunctionP(const void* func, void* param1); -    void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2); -    void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3); -    void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3); -    void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2); -    void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3); -    void ABI_CallFunctionA(const void* func, const OpArg& arg1); -    void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2); - -    // Pass a register as a parameter. -    void ABI_CallFunctionR(const void* func, X64Reg reg1); -    void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2); - -    template <typename Tr, typename T1> -    void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { -        ABI_CallFunctionC((const void*)func, param1); -    } - -    /** -     * Saves specified registers and adjusts the stack to be 16-byte aligned as required by the ABI -     * -     * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs) -     * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8 -     * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the -     * stack -     * @return Size of the shadow space, i.e., offset of the frame -     */ -    size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, -                                           size_t needed_frame_size = 0); - -    /** -     * Restores specified registers and adjusts the stack to its original alignment, i.e., the -     * alignment before -     * the matching PushRegistersAndAdjustStack. -     * -     * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are -     * GPRs) -     * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must -     * be 0 or 8 -     * @param needed_frame_size Additional space that was needed -     * @warning Stack must be currently 16-byte aligned -     */ -    void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, -                                        size_t needed_frame_size = 0); - -#ifdef _M_IX86 -    static int ABI_GetNumXMMRegs() { -        return 8; -    } -#else -    static int ABI_GetNumXMMRegs() { -        return 16; -    } -#endif -}; // class XEmitter - -// Everything that needs to generate X86 code should inherit from this. -// You get memory management for free, plus, you can use all the MOV etc functions without -// having to prefix them with gen-> or something similar. - -class XCodeBlock : public CodeBlock<XEmitter> { -public: -    void PoisonMemory() override; -}; - -} // namespace | 
