diff options
42 files changed, 1870 insertions, 1222 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 00d71dbdc..6cdac1177 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,7 +155,8 @@ IF (APPLE)      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")  ELSEIF(MINGW)      # GCC does not support codecvt, so use iconv instead -    set(PLATFORM_LIBRARIES winmm ws2_32 iconv) +    # PSAPI is the Process Status API +    set(PLATFORM_LIBRARIES winmm ws2_32 psapi iconv)      # WSAPoll functionality doesn't exist before WinNT 6.x (Vista and up)      add_definitions(-D_WIN32_WINNT=0x0600) diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index a1a4865bd..8bf2a3e13 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -287,6 +287,17 @@ void GMainWindow::ShutdownGame() {      render_window->hide();  } +void GMainWindow::StoreRecentFile(const QString& filename) +{ +    QSettings settings; +    QStringList recent_files = settings.value("recentFiles").toStringList(); +    recent_files.prepend(filename); +    recent_files.removeDuplicates(); +    settings.setValue("recentFiles", recent_files); + +    UpdateRecentFiles(); +} +  void GMainWindow::UpdateRecentFiles() {      QSettings settings;      QStringList recent_files = settings.value("recentFiles").toStringList(); @@ -297,6 +308,7 @@ void GMainWindow::UpdateRecentFiles() {          QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName());          actions_recent_files[i]->setText(text);          actions_recent_files[i]->setData(recent_files[i]); +        actions_recent_files[i]->setToolTip(recent_files[i]);          actions_recent_files[i]->setVisible(true);      } @@ -319,11 +331,7 @@ void GMainWindow::OnMenuLoadFile() {      QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)"));      if (filename.size()) {          settings.setValue("romsPath", QFileInfo(filename).path()); -        // Update recent files list -        QStringList recent_files = settings.value("recentFiles").toStringList(); -        recent_files.prepend(filename); -        settings.setValue("recentFiles", recent_files); -        UpdateRecentFiles(); // Update UI +        StoreRecentFile(filename);          BootGame(filename.toLatin1().data());      } @@ -349,6 +357,7 @@ void GMainWindow::OnMenuRecentFile() {      QFileInfo file_info(filename);      if (file_info.exists()) {          BootGame(filename.toLatin1().data()); +        StoreRecentFile(filename); // Put the filename on top of the list      } else {          // Display an error message and remove the file from the list.          QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename)); @@ -357,12 +366,7 @@ void GMainWindow::OnMenuRecentFile() {          QStringList recent_files = settings.value("recentFiles").toStringList();          recent_files.removeOne(filename);          settings.setValue("recentFiles", recent_files); - -        action->setVisible(false); -        // Grey out the recent files menu if the list is empty -        if (ui.menu_recent_files->isEmpty()) { -            ui.menu_recent_files->setEnabled(false); -        } +        UpdateRecentFiles();      }  } diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h index 4b260ae8b..6f1292295 100644 --- a/src/citra_qt/main.h +++ b/src/citra_qt/main.h @@ -60,6 +60,24 @@ private:      void BootGame(const std::string& filename);      void ShutdownGame(); +    /** +     * Stores the filename in the recently loaded files list. +     * The new filename is stored at the beginning of the recently loaded files list. +     * After inserting the new entry, duplicates are removed meaning that if +     * this was inserted from \a OnMenuRecentFile(), the entry will be put on top +     * and remove from its previous position. +     * +     * Finally, this function calls \a UpdateRecentFiles() to update the UI. +     * +     * @param filename the filename to store +     */ +    void StoreRecentFile(const QString& filename); + +    /** +     * Updates the recent files menu. +     * Menu entries are rebuilt from the configuration file. +     * If there is no entry in the menu, the menu is greyed out. +     */      void UpdateRecentFiles();      void closeEvent(QCloseEvent* event) override; diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 88e452a16..ed20c3629 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -45,14 +45,20 @@  // GCC 4.8 defines all the rotate functions now  // Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit -#ifndef _rotl -inline u32 _rotl(u32 x, int shift) { +#ifdef _rotl +#define rotl _rotl +#else +inline u32 rotl(u32 x, int shift) {      shift &= 31;      if (!shift) return x;      return (x << shift) | (x >> (32 - shift));  } +#endif -inline u32 _rotr(u32 x, int shift) { +#ifdef _rotr +#define rotr _rotr +#else +inline u32 rotr(u32 x, int shift) {      shift &= 31;      if (!shift) return x;      return (x >> shift) | (x << (32 - shift)); diff --git a/src/common/file_util.h b/src/common/file_util.h index d0dccdf69..e71a9b2fa 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h @@ -244,7 +244,7 @@ private:  template <typename T>  void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode)  { -#ifdef _WIN32 +#ifdef _MSC_VER      fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode);  #else      fstream.open(filename.c_str(), openmode); diff --git a/src/common/logging/log.h b/src/common/logging/log.h index e16dde7fc..5fd3bd7f5 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -91,17 +91,16 @@ void LogMessage(Class log_class, Level log_level,  } // namespace Log  #define LOG_GENERIC(log_class, log_level, ...) \ -    ::Log::LogMessage(::Log::Class::log_class, ::Log::Level::log_level, \ -        __FILE__, __LINE__, __func__, __VA_ARGS__) +    ::Log::LogMessage(log_class, log_level, __FILE__, __LINE__, __func__, __VA_ARGS__)  #ifdef _DEBUG -#define LOG_TRACE(   log_class, ...) LOG_GENERIC(log_class, Trace,    __VA_ARGS__) +#define LOG_TRACE(   log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Trace,    __VA_ARGS__)  #else  #define LOG_TRACE(   log_class, ...) (void(0))  #endif -#define LOG_DEBUG(   log_class, ...) LOG_GENERIC(log_class, Debug,    __VA_ARGS__) -#define LOG_INFO(    log_class, ...) LOG_GENERIC(log_class, Info,     __VA_ARGS__) -#define LOG_WARNING( log_class, ...) LOG_GENERIC(log_class, Warning,  __VA_ARGS__) -#define LOG_ERROR(   log_class, ...) LOG_GENERIC(log_class, Error,    __VA_ARGS__) -#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(log_class, Critical, __VA_ARGS__) +#define LOG_DEBUG(   log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Debug,    __VA_ARGS__) +#define LOG_INFO(    log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Info,     __VA_ARGS__) +#define LOG_WARNING( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Warning,  __VA_ARGS__) +#define LOG_ERROR(   log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Error,    __VA_ARGS__) +#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Critical, __VA_ARGS__) diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 4b79acd1f..939df210e 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp @@ -15,6 +15,7 @@  // Official SVN repository and contact information can be found at  // http://code.google.com/p/dolphin-emu/ +#include <cinttypes>  #include <cstring>  #include "common/assert.h" @@ -25,11 +26,6 @@  #include "cpu_detect.h"  #include "emitter.h" -#define PRIx64 "llx" - -// Minimize the diff against Dolphin -#define DYNA_REC JIT -  namespace Gen  { @@ -113,6 +109,29 @@ u8 *XEmitter::GetWritableCodePtr()      return code;  } +void XEmitter::Write8(u8 value) +{ +    *code++ = value; +} + +void XEmitter::Write16(u16 value) +{ +    std::memcpy(code, &value, sizeof(u16)); +    code += sizeof(u16); +} + +void XEmitter::Write32(u32 value) +{ +    std::memcpy(code, &value, sizeof(u32)); +    code += sizeof(u32); +} + +void XEmitter::Write64(u64 value) +{ +    std::memcpy(code, &value, sizeof(u64)); +    code += sizeof(u64); +} +  void XEmitter::ReserveCodeSpace(int bytes)  {      for (int i = 0; i < bytes; i++) @@ -374,7 +393,7 @@ void XEmitter::Rex(int w, int r, int x, int b)          Write8(rx);  } -void XEmitter::JMP(const u8 *addr, bool force5Bytes) +void XEmitter::JMP(const u8* addr, bool force5Bytes)  {      u64 fn = (u64)addr;      if (!force5Bytes) @@ -398,7 +417,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes)      }  } -void XEmitter::JMPptr(const OpArg &arg2) +void XEmitter::JMPptr(const OpArg& arg2)  {      OpArg arg = arg2;      if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); @@ -425,7 +444,7 @@ void XEmitter::CALLptr(OpArg arg)      arg.WriteRest(this);  } -void XEmitter::CALL(const void *fnptr) +void XEmitter::CALL(const void* fnptr)  {      u64 distance = u64(fnptr) - (u64(code) + 5);      ASSERT_MSG( @@ -496,7 +515,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)      }  } -void XEmitter::SetJumpTarget(const FixupBranch &branch) +void XEmitter::SetJumpTarget(const FixupBranch& branch)  {      if (branch.type == 0)      { @@ -512,30 +531,6 @@ void XEmitter::SetJumpTarget(const FixupBranch &branch)      }  } -// INC/DEC considered harmful on newer CPUs due to partial flag set. -// Use ADD, SUB instead. - -/* -void XEmitter::INC(int bits, OpArg arg) -{ -    if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument"); -    arg.operandReg = 0; -    if (bits == 16) {Write8(0x66);} -    arg.WriteRex(this, bits, bits); -    Write8(bits == 8 ? 0xFE : 0xFF); -    arg.WriteRest(this); -} -void XEmitter::DEC(int bits, OpArg arg) -{ -    if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument"); -    arg.operandReg = 1; -    if (bits == 16) {Write8(0x66);} -    arg.WriteRex(this, bits, bits); -    Write8(bits == 8 ? 0xFE : 0xFF); -    arg.WriteRest(this); -} -*/ -  //Single byte opcodes  //There is no PUSHAD/POPAD in 64-bit mode.  void XEmitter::INT3() {Write8(0xCC);} @@ -667,7 +662,7 @@ void XEmitter::CBW(int bits)  void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}  void XEmitter::POP(X64Reg reg)  {WriteSimple1Byte(32, 0x58, reg);} -void XEmitter::PUSH(int bits, const OpArg ®) +void XEmitter::PUSH(int bits, const OpArg& reg)  {      if (reg.IsSimpleReg())          PUSH(reg.GetSimpleReg()); @@ -703,7 +698,7 @@ void XEmitter::PUSH(int bits, const OpArg ®)      }  } -void XEmitter::POP(int /*bits*/, const OpArg ®) +void XEmitter::POP(int /*bits*/, const OpArg& reg)  {      if (reg.IsSimpleReg())          POP(reg.GetSimpleReg()); @@ -791,12 +786,12 @@ void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)      src.WriteRest(this);  } -void XEmitter::MUL(int bits, OpArg src)  {WriteMulDivType(bits, src, 4);} -void XEmitter::DIV(int bits, OpArg src)  {WriteMulDivType(bits, src, 6);} -void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} -void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} -void XEmitter::NEG(int bits, OpArg src)  {WriteMulDivType(bits, src, 3);} -void XEmitter::NOT(int bits, OpArg src)  {WriteMulDivType(bits, src, 2);} +void XEmitter::MUL(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 4);} +void XEmitter::DIV(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 6);} +void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);} +void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);} +void XEmitter::NEG(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 3);} +void XEmitter::NOT(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 2);}  void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)  { @@ -813,24 +808,24 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo      src.WriteRest(this);  } -void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) +void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src)  {      if (bits <= 16)          ASSERT_MSG(0, "MOVNTI - bits<=16");      WriteBitSearchType(bits, src, dest, 0xC3);  } -void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit -void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit +void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit +void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit -void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) +void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src)  {      CheckFlags();      if (!Common::GetCPUCaps().bmi1)          ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");      WriteBitSearchType(bits, dest, src, 0xBC, true);  } -void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) +void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src)  {      CheckFlags();      if (!Common::GetCPUCaps().lzcnt) @@ -950,7 +945,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src)  }  //shift can be either imm8 or cl -void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) +void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext)  {      CheckFlags();      bool writeImm = false; @@ -991,16 +986,16 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)  // large rotates and shift are slower on intel than amd  // intel likes to rotate by 1, and the op is smaller too -void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} -void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} -void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} -void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} -void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} -void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} -void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} +void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);} +void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);} +void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);} +void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);} +void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);} +void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);} +void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);}  // index can be either imm8 or register, don't use memory destination because it's slow -void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) +void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext)  {      CheckFlags();      if (dest.IsImm()) @@ -1029,13 +1024,13 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)      }  } -void XEmitter::BT(int bits, OpArg dest, OpArg index)  {WriteBitTest(bits, dest, index, 4);} -void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);} -void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);} -void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);} +void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index)  {WriteBitTest(bits, dest, index, 4);} +void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);} +void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);} +void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);}  //shift can be either imm8 or cl -void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) +void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)  {      CheckFlags();      if (dest.IsImm()) @@ -1067,7 +1062,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)      }  } -void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) +void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)  {      CheckFlags();      if (dest.IsImm()) @@ -1111,7 +1106,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit  }  //operand can either be immediate or register -void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const +void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const  {      X64Reg _operandReg;      if (IsImm()) @@ -1257,7 +1252,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o      }  } -void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2) +void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2)  {      if (a1.IsImm())      { @@ -1283,24 +1278,24 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg      }  } -void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} -void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} -void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} -void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} -void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} -void XEmitter::OR  (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} -void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} -void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) +void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} +void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} +void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} +void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} +void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} +void XEmitter::OR  (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} +void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} +void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2)  {      if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())          LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);      WriteNormalOp(this, bits, nrmMOV, a1, a2);  } -void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} -void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} -void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} +void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} +void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} +void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} -void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) +void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)  {      CheckFlags();      if (bits == 8) @@ -1353,7 +1348,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)      }  } -void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) +void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a)  {      CheckFlags();      if (bits == 8) @@ -1390,7 +1385,7 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr      arg.WriteRest(this, extrabytes);  } -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)  {      WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);  } @@ -1400,25 +1395,25 @@ static int GetVEXmmmmm(u16 op)      // Currently, only 0x38 and 0x3A are used as secondary escape byte.      if ((op >> 8) == 0x3A)          return 3; -    else if ((op >> 8) == 0x38) +    if ((op >> 8) == 0x38)          return 2; -    else -        return 1; + +    return 1;  }  static int GetVEXpp(u8 opPrefix)  {      if (opPrefix == 0x66)          return 1; -    else if (opPrefix == 0xF3) +    if (opPrefix == 0xF3)          return 2; -    else if (opPrefix == 0xF2) +    if (opPrefix == 0xF2)          return 3; -    else -        return 0; + +    return 0;  } -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)  {      if (!Common::GetCPUCaps().avx)          ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); @@ -1431,7 +1426,7 @@ void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpA  }  // Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 -void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)  {      if (size != 32 && size != 64)          ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); @@ -1442,7 +1437,7 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r      arg.WriteRest(this, extrabytes, regOp1);  } -void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)  {      CheckFlags();      if (!Common::GetCPUCaps().bmi1) @@ -1450,7 +1445,7 @@ void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg      WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);  } -void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)  {      CheckFlags();      if (!Common::GetCPUCaps().bmi2) @@ -1517,135 +1512,136 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext)      arg.WriteRest(this);  } -void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} -void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} - -void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} -void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} -void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} - -void XEmitter::ADDSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseADD, regOp, arg);} -void XEmitter::ADDSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseADD, regOp, arg);} -void XEmitter::SUBSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseSUB, regOp, arg);} -void XEmitter::SUBSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseSUB, regOp, arg);} -void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::MULSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMUL, regOp, arg);} -void XEmitter::MULSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMUL, regOp, arg);} -void XEmitter::DIVSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseDIV, regOp, arg);} -void XEmitter::DIVSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseDIV, regOp, arg);} -void XEmitter::MINSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMIN, regOp, arg);} -void XEmitter::MINSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMIN, regOp, arg);} -void XEmitter::MAXSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMAX, regOp, arg);} -void XEmitter::MAXSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMAX, regOp, arg);} -void XEmitter::SQRTSS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} -void XEmitter::SQRTSD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} -void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} - -void XEmitter::ADDPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseADD, regOp, arg);} -void XEmitter::ADDPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseADD, regOp, arg);} -void XEmitter::SUBPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseSUB, regOp, arg);} -void XEmitter::SUBPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseSUB, regOp, arg);} -void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::ANDPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseAND, regOp, arg);} -void XEmitter::ANDPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseAND, regOp, arg);} -void XEmitter::ANDNPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseANDN, regOp, arg);} -void XEmitter::ANDNPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseANDN, regOp, arg);} -void XEmitter::ORPS(X64Reg regOp, OpArg arg)    {WriteSSEOp(0x00, sseOR, regOp, arg);} -void XEmitter::ORPD(X64Reg regOp, OpArg arg)    {WriteSSEOp(0x66, sseOR, regOp, arg);} -void XEmitter::XORPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseXOR, regOp, arg);} -void XEmitter::XORPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseXOR, regOp, arg);} -void XEmitter::MULPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseMUL, regOp, arg);} -void XEmitter::MULPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseMUL, regOp, arg);} -void XEmitter::DIVPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseDIV, regOp, arg);} -void XEmitter::DIVPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseDIV, regOp, arg);} -void XEmitter::MINPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseMIN, regOp, arg);} -void XEmitter::MINPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseMIN, regOp, arg);} -void XEmitter::MAXPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseMAX, regOp, arg);} -void XEmitter::MAXPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseMAX, regOp, arg);} -void XEmitter::SQRTPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseSQRT, regOp, arg);} -void XEmitter::SQRTPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseSQRT, regOp, arg);} -void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } -void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} -void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} -void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} - -void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} - -void XEmitter::COMISS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed -void XEmitter::COMISD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered -void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered -void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} - -void XEmitter::MOVAPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPS(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} -void XEmitter::MOVAPD(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} - -void XEmitter::MOVUPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPS(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVUPD(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVDQA(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQA(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} -void XEmitter::MOVDQU(X64Reg regOp, OpArg arg)  {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQU(OpArg arg, X64Reg regOp)  {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} - -void XEmitter::MOVSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSS(OpArg arg, X64Reg regOp)   {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVSD(OpArg arg, X64Reg regOp)   {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVLPS(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPD(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPS(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } -void XEmitter::MOVLPD(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } - -void XEmitter::MOVHPS(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPD(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPS(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } -void XEmitter::MOVHPD(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } +void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);} +void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);} + +void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} +void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} +void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} + +void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseADD, regOp, arg);} +void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseADD, regOp, arg);} +void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseSUB, regOp, arg);} +void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseSUB, regOp, arg);} +void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::MULSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMUL, regOp, arg);} +void XEmitter::MULSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMUL, regOp, arg);} +void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseDIV, regOp, arg);} +void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseDIV, regOp, arg);} +void XEmitter::MINSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMIN, regOp, arg);} +void XEmitter::MINSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMIN, regOp, arg);} +void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMAX, regOp, arg);} +void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMAX, regOp, arg);} +void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} +void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} +void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseRCP, regOp, arg);} +void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} + +void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseADD, regOp, arg);} +void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseADD, regOp, arg);} +void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseSUB, regOp, arg);} +void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseSUB, regOp, arg);} +void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseAND, regOp, arg);} +void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseAND, regOp, arg);} +void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseANDN, regOp, arg);} +void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseANDN, regOp, arg);} +void XEmitter::ORPS(X64Reg regOp, const OpArg& arg)    {WriteSSEOp(0x00, sseOR, regOp, arg);} +void XEmitter::ORPD(X64Reg regOp, const OpArg& arg)    {WriteSSEOp(0x66, sseOR, regOp, arg);} +void XEmitter::XORPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseXOR, regOp, arg);} +void XEmitter::XORPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseXOR, regOp, arg);} +void XEmitter::MULPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseMUL, regOp, arg);} +void XEmitter::MULPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseMUL, regOp, arg);} +void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseDIV, regOp, arg);} +void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseDIV, regOp, arg);} +void XEmitter::MINPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseMIN, regOp, arg);} +void XEmitter::MINPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseMIN, regOp, arg);} +void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseMAX, regOp, arg);} +void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseMAX, regOp, arg);} +void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseSQRT, regOp, arg);} +void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseSQRT, regOp, arg);} +void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } +void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} +void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} +void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} + +void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} + +void XEmitter::COMISS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed +void XEmitter::COMISD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered +void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered +void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} + +void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} +void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} + +void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} + +void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} +void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} + +void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp)   {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp)   {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} + +void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } +void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } +void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } +void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } + +void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } +void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } +void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } +void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }  void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}  void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} -void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} -void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} +void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} +void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} -void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} -void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} -void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} -void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} -void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} -void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} +void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} +void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} +void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} +void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} +void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} +void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} -void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} -void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} -void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} -void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} +void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} +void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} +void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} +void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} -void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} -void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} -void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} -void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} +void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} +void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} +void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} +void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}  void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src)  {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} -void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);} -void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);} +void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);} +void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);} -void XEmitter::LDDQU(X64Reg dest, OpArg arg)    {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only +void XEmitter::LDDQU(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only  // THESE TWO ARE UNTESTED. -void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} -void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} +void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);} +void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);} -void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);} -void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);} +void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);} +void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);} -void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) +void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)  {      if (Common::GetCPUCaps().sse3)      { @@ -1663,9 +1659,9 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)  //There are a few more left  // Also some integer instructions are missing -void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} -void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);} -void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);} +void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} +void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);} +void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);}  void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}  void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} @@ -1690,7 +1686,7 @@ void XEmitter::PSRLQ(X64Reg reg, int shift)      Write8(shift);  } -void XEmitter::PSRLQ(X64Reg reg, OpArg arg) +void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg)  {      WriteSSEOp(0x66, 0xd3, reg, arg);  } @@ -1735,212 +1731,212 @@ void XEmitter::PSRAD(X64Reg reg, int shift)      Write8(shift);  } -void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)  {      if (!Common::GetCPUCaps().ssse3)          ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");      WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);  } -void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)  {      if (!Common::GetCPUCaps().sse4_1)          ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");      WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);  } -void XEmitter::PSHUFB(X64Reg dest, OpArg arg)   {WriteSSSE3Op(0x66, 0x3800, dest, arg);} -void XEmitter::PTEST(X64Reg dest, OpArg arg)    {WriteSSE41Op(0x66, 0x3817, dest, arg);} -void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} -void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} - -void XEmitter::PMINSB(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x3838, dest, arg);} -void XEmitter::PMINSD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x3839, dest, arg);} -void XEmitter::PMINUW(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383a, dest, arg);} -void XEmitter::PMINUD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383b, dest, arg);} -void XEmitter::PMAXSB(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383c, dest, arg);} -void XEmitter::PMAXSD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383d, dest, arg);} -void XEmitter::PMAXUW(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383e, dest, arg);} -void XEmitter::PMAXUD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383f, dest, arg);} - -void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} -void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} -void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} -void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} -void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} -void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} -void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} -void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} -void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} -void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} -void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} -void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} - -void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} -void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} -void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} +void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg)   {WriteSSSE3Op(0x66, 0x3800, dest, arg);} +void XEmitter::PTEST(X64Reg dest, const OpArg& arg)    {WriteSSE41Op(0x66, 0x3817, dest, arg);} +void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} +void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} + +void XEmitter::PMINSB(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x3838, dest, arg);} +void XEmitter::PMINSD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x3839, dest, arg);} +void XEmitter::PMINUW(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383a, dest, arg);} +void XEmitter::PMINUD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383b, dest, arg);} +void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383c, dest, arg);} +void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383d, dest, arg);} +void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383e, dest, arg);} +void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383f, dest, arg);} + +void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} +void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} +void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} +void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} +void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} +void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} +void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} +void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} +void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} +void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} +void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} +void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} + +void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} +void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} +void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}  void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }  void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } -void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} -void XEmitter::PAND(X64Reg dest, OpArg arg)     {WriteSSEOp(0x66, 0xDB, dest, arg);} -void XEmitter::PANDN(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xDF, dest, arg);} -void XEmitter::PXOR(X64Reg dest, OpArg arg)     {WriteSSEOp(0x66, 0xEF, dest, arg);} -void XEmitter::POR(X64Reg dest, OpArg arg)      {WriteSSEOp(0x66, 0xEB, dest, arg);} +void XEmitter::PAND(X64Reg dest, const OpArg& arg)     {WriteSSEOp(0x66, 0xDB, dest, arg);} +void XEmitter::PANDN(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xDF, dest, arg);} +void XEmitter::PXOR(X64Reg dest, const OpArg& arg)     {WriteSSEOp(0x66, 0xEF, dest, arg);} +void XEmitter::POR(X64Reg dest, const OpArg& arg)      {WriteSSEOp(0x66, 0xEB, dest, arg);} -void XEmitter::PADDB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFC, dest, arg);} -void XEmitter::PADDW(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFD, dest, arg);} -void XEmitter::PADDD(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFE, dest, arg);} -void XEmitter::PADDQ(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xD4, dest, arg);} +void XEmitter::PADDB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFC, dest, arg);} +void XEmitter::PADDW(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFD, dest, arg);} +void XEmitter::PADDD(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFE, dest, arg);} +void XEmitter::PADDQ(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xD4, dest, arg);} -void XEmitter::PADDSB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xEC, dest, arg);} -void XEmitter::PADDSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xED, dest, arg);} -void XEmitter::PADDUSB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xDC, dest, arg);} -void XEmitter::PADDUSW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xDD, dest, arg);} +void XEmitter::PADDSB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xEC, dest, arg);} +void XEmitter::PADDSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xED, dest, arg);} +void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xDC, dest, arg);} +void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xDD, dest, arg);} -void XEmitter::PSUBB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xF8, dest, arg);} -void XEmitter::PSUBW(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xF9, dest, arg);} -void XEmitter::PSUBD(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFA, dest, arg);} -void XEmitter::PSUBQ(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFB, dest, arg);} +void XEmitter::PSUBB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xF8, dest, arg);} +void XEmitter::PSUBW(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xF9, dest, arg);} +void XEmitter::PSUBD(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFA, dest, arg);} +void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFB, dest, arg);} -void XEmitter::PSUBSB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xE8, dest, arg);} -void XEmitter::PSUBSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xE9, dest, arg);} -void XEmitter::PSUBUSB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xD8, dest, arg);} -void XEmitter::PSUBUSW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xD9, dest, arg);} +void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xE8, dest, arg);} +void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xE9, dest, arg);} +void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xD8, dest, arg);} +void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xD9, dest, arg);} -void XEmitter::PAVGB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xE0, dest, arg);} -void XEmitter::PAVGW(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xE3, dest, arg);} +void XEmitter::PAVGB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xE0, dest, arg);} +void XEmitter::PAVGW(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xE3, dest, arg);} -void XEmitter::PCMPEQB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x74, dest, arg);} -void XEmitter::PCMPEQW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x75, dest, arg);} -void XEmitter::PCMPEQD(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x76, dest, arg);} +void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x74, dest, arg);} +void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x75, dest, arg);} +void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x76, dest, arg);} -void XEmitter::PCMPGTB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x64, dest, arg);} -void XEmitter::PCMPGTW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x65, dest, arg);} -void XEmitter::PCMPGTD(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x66, dest, arg);} +void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x64, dest, arg);} +void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x65, dest, arg);} +void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x66, dest, arg);} -void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg)    {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} -void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg)    {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} +void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg)    {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} +void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg)    {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} -void XEmitter::PMADDWD(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xF5, dest, arg); } -void XEmitter::PSADBW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xF6, dest, arg);} +void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xF5, dest, arg); } +void XEmitter::PSADBW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xF6, dest, arg);} -void XEmitter::PMAXSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xEE, dest, arg); } -void XEmitter::PMAXUB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xDE, dest, arg); } -void XEmitter::PMINSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xEA, dest, arg); } -void XEmitter::PMINUB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xDA, dest, arg); } +void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xEE, dest, arg); } +void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xDE, dest, arg); } +void XEmitter::PMINSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xEA, dest, arg); } +void XEmitter::PMINUB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xDA, dest, arg); } -void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xD7, dest, arg); } -void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle)    {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle)   {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle)   {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xD7, dest, arg); } +void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle)    {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle)   {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle)   {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}  // VEX -void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)  {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} -void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} -void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} -void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} - -void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)  { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)  { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } -void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } -void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } -void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } - -void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } -void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } -void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg)     { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } -void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } - -void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } -void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } -void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } -void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } -void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } -void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } -void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } - -void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate)      {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} -void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} -void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} -void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg)               {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} -void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} -void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} +void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)  {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} +void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} +void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} +void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} + +void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } +void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } +void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)  { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } +void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)  { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } +void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } +void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } +void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } +void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } + +void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } +void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } +void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)     { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } +void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } + +void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } +void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } +void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } +void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } +void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } +void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } +void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } +void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } +void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } +void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } +void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } +void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } +void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } +void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } +void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } +void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } +void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } +void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } +void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } + +void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate)      {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} +void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} +void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} +void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg)               {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} +void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} +void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}  // Prefixes @@ -1956,7 +1952,7 @@ void XEmitter::FWAIT()  }  // TODO: make this more generic -void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg) +void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg)  {      int mf = 0;      ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); @@ -1974,9 +1970,9 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg a      arg.WriteRest(this, 0, (X64Reg) op);  } -void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} -void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} -void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} +void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} +void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} +void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}  void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }  void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index e9c924126..a49cd2cf1 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h @@ -328,8 +328,6 @@ enum SSECompare      ORD,  }; -typedef const u8* JumpTarget; -  class XEmitter  {      friend struct OpArg;  // for Write8 etc @@ -344,27 +342,27 @@ private:      void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);      void WriteMulDivType(int bits, OpArg src, int ext);      void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); -    void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); -    void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); +    void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext); +    void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);      void WriteMXCSR(OpArg arg, int ext);      void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); -    void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); -    void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); -    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); -    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); -    void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); -    void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); -    void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); -    void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); -    void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); +    void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); +    void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); +    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); +    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); +    void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); +    void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); +    void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); +    void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); +    void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);      void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);  protected: -    inline void Write8(u8 value)   {*code++ = value;} -    inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} -    inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} -    inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} +    void Write8(u8 value); +    void Write16(u16 value); +    void Write32(u32 value); +    void Write64(u64 value);  public:      XEmitter() { code = nullptr; flags_locked = false; } @@ -413,8 +411,8 @@ public:      // Stack control      void PUSH(X64Reg reg);      void POP(X64Reg reg); -    void PUSH(int bits, const OpArg ®); -    void POP(int bits, const OpArg ®); +    void PUSH(int bits, const OpArg& reg); +    void POP(int bits, const OpArg& reg);      void PUSHF();      void POPF(); @@ -424,21 +422,19 @@ public:      void UD2();      FixupBranch J(bool force5bytes = false); -    void JMP(const u8 * addr, bool force5Bytes = false); -    void JMP(OpArg arg); -    void JMPptr(const OpArg &arg); +    void JMP(const u8* addr, bool force5Bytes = false); +    void JMPptr(const OpArg& arg);      void JMPself(); //infinite loop!  #ifdef CALL  #undef CALL  #endif -    void CALL(const void *fnptr); +    void CALL(const void* fnptr);      void CALLptr(OpArg arg);      FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); -    //void J_CC(CCFlags conditionCode, JumpTarget target); -    void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false); +    void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); -    void SetJumpTarget(const FixupBranch &branch); +    void SetJumpTarget(const FixupBranch& branch);      void SETcc(CCFlags flag, OpArg dest);      // Note: CMOV brings small if any benefit on current cpus. @@ -450,8 +446,8 @@ public:      void SFENCE();      // Bit scan -    void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit -    void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit +    void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit +    void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit      // Cache control      enum PrefetchLevel @@ -462,67 +458,67 @@ public:          PF_T2,  //Levels 3+ (aliased to T0 on AMD)      };      void PREFETCH(PrefetchLevel level, OpArg arg); -    void MOVNTI(int bits, OpArg dest, X64Reg src); -    void MOVNTDQ(OpArg arg, X64Reg regOp); -    void MOVNTPS(OpArg arg, X64Reg regOp); -    void MOVNTPD(OpArg arg, X64Reg regOp); +    void MOVNTI(int bits, const OpArg& dest, X64Reg src); +    void MOVNTDQ(const OpArg& arg, X64Reg regOp); +    void MOVNTPS(const OpArg& arg, X64Reg regOp); +    void MOVNTPD(const OpArg& arg, X64Reg regOp);      // Multiplication / division -    void MUL(int bits, OpArg src); //UNSIGNED -    void IMUL(int bits, OpArg src); //SIGNED -    void IMUL(int bits, X64Reg regOp, OpArg src); -    void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); -    void DIV(int bits, OpArg src); -    void IDIV(int bits, OpArg src); +    void MUL(int bits, const OpArg& src); //UNSIGNED +    void IMUL(int bits, const OpArg& src); //SIGNED +    void IMUL(int bits, X64Reg regOp, const OpArg& src); +    void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); +    void DIV(int bits, const OpArg& src); +    void IDIV(int bits, const OpArg& src);      // Shift -    void ROL(int bits, OpArg dest, OpArg shift); -    void ROR(int bits, OpArg dest, OpArg shift); -    void RCL(int bits, OpArg dest, OpArg shift); -    void RCR(int bits, OpArg dest, OpArg shift); -    void SHL(int bits, OpArg dest, OpArg shift); -    void SHR(int bits, OpArg dest, OpArg shift); -    void SAR(int bits, OpArg dest, OpArg shift); +    void ROL(int bits, const OpArg& dest, const OpArg& shift); +    void ROR(int bits, const OpArg& dest, const OpArg& shift); +    void RCL(int bits, const OpArg& dest, const OpArg& shift); +    void RCR(int bits, const OpArg& dest, const OpArg& shift); +    void SHL(int bits, const OpArg& dest, const OpArg& shift); +    void SHR(int bits, const OpArg& dest, const OpArg& shift); +    void SAR(int bits, const OpArg& dest, const OpArg& shift);      // Bit Test -    void BT(int bits, OpArg dest, OpArg index); -    void BTS(int bits, OpArg dest, OpArg index); -    void BTR(int bits, OpArg dest, OpArg index); -    void BTC(int bits, OpArg dest, OpArg index); +    void BT(int bits, const OpArg& dest, const OpArg& index); +    void BTS(int bits, const OpArg& dest, const OpArg& index); +    void BTR(int bits, const OpArg& dest, const OpArg& index); +    void BTC(int bits, const OpArg& dest, const OpArg& index);      // Double-Precision Shift -    void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); -    void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); +    void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); +    void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);      // Extend EAX into EDX in various ways      void CWD(int bits = 16); -    inline void CDQ() {CWD(32);} -    inline void CQO() {CWD(64);} +    void CDQ() {CWD(32);} +    void CQO() {CWD(64);}      void CBW(int bits = 8); -    inline void CWDE() {CBW(16);} -    inline void CDQE() {CBW(32);} +    void CWDE() {CBW(16);} +    void CDQE() {CBW(32);}      // Load effective address      void LEA(int bits, X64Reg dest, OpArg src);      // Integer arithmetic -    void NEG (int bits, OpArg src); -    void ADD (int bits, const OpArg &a1, const OpArg &a2); -    void ADC (int bits, const OpArg &a1, const OpArg &a2); -    void SUB (int bits, const OpArg &a1, const OpArg &a2); -    void SBB (int bits, const OpArg &a1, const OpArg &a2); -    void AND (int bits, const OpArg &a1, const OpArg &a2); -    void CMP (int bits, const OpArg &a1, const OpArg &a2); +    void NEG(int bits, const OpArg& src); +    void ADD(int bits, const OpArg& a1, const OpArg& a2); +    void ADC(int bits, const OpArg& a1, const OpArg& a2); +    void SUB(int bits, const OpArg& a1, const OpArg& a2); +    void SBB(int bits, const OpArg& a1, const OpArg& a2); +    void AND(int bits, const OpArg& a1, const OpArg& a2); +    void CMP(int bits, const OpArg& a1, const OpArg& a2);      // Bit operations -    void NOT (int bits, OpArg src); -    void OR  (int bits, const OpArg &a1, const OpArg &a2); -    void XOR (int bits, const OpArg &a1, const OpArg &a2); -    void MOV (int bits, const OpArg &a1, const OpArg &a2); -    void TEST(int bits, const OpArg &a1, const OpArg &a2); +    void NOT (int bits, const OpArg& src); +    void OR(int bits, const OpArg& a1, const OpArg& a2); +    void XOR(int bits, const OpArg& a1, const OpArg& a2); +    void MOV(int bits, const OpArg& a1, const OpArg& a2); +    void TEST(int bits, const OpArg& a1, const OpArg& a2);      // Are these useful at all? Consider removing. -    void XCHG(int bits, const OpArg &a1, const OpArg &a2); +    void XCHG(int bits, const OpArg& a1, const OpArg& a2);      void XCHG_AHAL();      // Byte swapping (32 and 64-bit only). @@ -536,13 +532,13 @@ public:      void MOVBE(int dbits, const OpArg& dest, const OpArg& src);      // Available only on AMD >= Phenom or Intel >= Haswell -    void LZCNT(int bits, X64Reg dest, OpArg src); +    void LZCNT(int bits, X64Reg dest, const OpArg& src);      // Note: this one is actually part of BMI1 -    void TZCNT(int bits, X64Reg dest, OpArg src); +    void TZCNT(int bits, X64Reg dest, const OpArg& src);      // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) -    void STMXCSR(OpArg memloc); -    void LDMXCSR(OpArg memloc); +    void STMXCSR(const OpArg& memloc); +    void LDMXCSR(const OpArg& memloc);      // Prefixes      void LOCK(); @@ -569,259 +565,243 @@ public:          x87_FPUBusy = 0x8000,      }; -    void FLD(int bits, OpArg src); -    void FST(int bits, OpArg dest); -    void FSTP(int bits, OpArg dest); +    void FLD(int bits, const OpArg& src); +    void FST(int bits, const OpArg& dest); +    void FSTP(int bits, const OpArg& dest);      void FNSTSW_AX();      void FWAIT();      // SSE/SSE2: Floating point arithmetic -    void ADDSS(X64Reg regOp, OpArg arg); -    void ADDSD(X64Reg regOp, OpArg arg); -    void SUBSS(X64Reg regOp, OpArg arg); -    void SUBSD(X64Reg regOp, OpArg arg); -    void MULSS(X64Reg regOp, OpArg arg); -    void MULSD(X64Reg regOp, OpArg arg); -    void DIVSS(X64Reg regOp, OpArg arg); -    void DIVSD(X64Reg regOp, OpArg arg); -    void MINSS(X64Reg regOp, OpArg arg); -    void MINSD(X64Reg regOp, OpArg arg); -    void MAXSS(X64Reg regOp, OpArg arg); -    void MAXSD(X64Reg regOp, OpArg arg); -    void SQRTSS(X64Reg regOp, OpArg arg); -    void SQRTSD(X64Reg regOp, OpArg arg); -    void RSQRTSS(X64Reg regOp, OpArg arg); +    void ADDSS(X64Reg regOp, const OpArg& arg); +    void ADDSD(X64Reg regOp, const OpArg& arg); +    void SUBSS(X64Reg regOp, const OpArg& arg); +    void SUBSD(X64Reg regOp, const OpArg& arg); +    void MULSS(X64Reg regOp, const OpArg& arg); +    void MULSD(X64Reg regOp, const OpArg& arg); +    void DIVSS(X64Reg regOp, const OpArg& arg); +    void DIVSD(X64Reg regOp, const OpArg& arg); +    void MINSS(X64Reg regOp, const OpArg& arg); +    void MINSD(X64Reg regOp, const OpArg& arg); +    void MAXSS(X64Reg regOp, const OpArg& arg); +    void MAXSD(X64Reg regOp, const OpArg& arg); +    void SQRTSS(X64Reg regOp, const OpArg& arg); +    void SQRTSD(X64Reg regOp, const OpArg& arg); +    void RCPSS(X64Reg regOp, const OpArg& arg); +    void RSQRTSS(X64Reg regOp, const OpArg& arg);      // SSE/SSE2: Floating point bitwise (yes) -    void CMPSS(X64Reg regOp, OpArg arg, u8 compare); -    void CMPSD(X64Reg regOp, OpArg arg, u8 compare); +    void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); +    void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); -    inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } -    inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } -    inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } -    inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } -    inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } -    inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } -    inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } +    void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); } +    void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); } +    void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); } +    void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); } +    void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); } +    void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); } +    void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }      // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) -    void ADDPS(X64Reg regOp, OpArg arg); -    void ADDPD(X64Reg regOp, OpArg arg); -    void SUBPS(X64Reg regOp, OpArg arg); -    void SUBPD(X64Reg regOp, OpArg arg); -    void CMPPS(X64Reg regOp, OpArg arg, u8 compare); -    void CMPPD(X64Reg regOp, OpArg arg, u8 compare); -    void MULPS(X64Reg regOp, OpArg arg); -    void MULPD(X64Reg regOp, OpArg arg); -    void DIVPS(X64Reg regOp, OpArg arg); -    void DIVPD(X64Reg regOp, OpArg arg); -    void MINPS(X64Reg regOp, OpArg arg); -    void MINPD(X64Reg regOp, OpArg arg); -    void MAXPS(X64Reg regOp, OpArg arg); -    void MAXPD(X64Reg regOp, OpArg arg); -    void SQRTPS(X64Reg regOp, OpArg arg); -    void SQRTPD(X64Reg regOp, OpArg arg); -    void RCPPS(X64Reg regOp, OpArg arg); -    void RSQRTPS(X64Reg regOp, OpArg arg); +    void ADDPS(X64Reg regOp, const OpArg& arg); +    void ADDPD(X64Reg regOp, const OpArg& arg); +    void SUBPS(X64Reg regOp, const OpArg& arg); +    void SUBPD(X64Reg regOp, const OpArg& arg); +    void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare); +    void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare); +    void MULPS(X64Reg regOp, const OpArg& arg); +    void MULPD(X64Reg regOp, const OpArg& arg); +    void DIVPS(X64Reg regOp, const OpArg& arg); +    void DIVPD(X64Reg regOp, const OpArg& arg); +    void MINPS(X64Reg regOp, const OpArg& arg); +    void MINPD(X64Reg regOp, const OpArg& arg); +    void MAXPS(X64Reg regOp, const OpArg& arg); +    void MAXPD(X64Reg regOp, const OpArg& arg); +    void SQRTPS(X64Reg regOp, const OpArg& arg); +    void SQRTPD(X64Reg regOp, const OpArg& arg); +    void RCPPS(X64Reg regOp, const OpArg& arg); +    void RSQRTPS(X64Reg regOp, const OpArg& arg);      // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) -    void ANDPS(X64Reg regOp, OpArg arg); -    void ANDPD(X64Reg regOp, OpArg arg); -    void ANDNPS(X64Reg regOp, OpArg arg); -    void ANDNPD(X64Reg regOp, OpArg arg); -    void ORPS(X64Reg regOp, OpArg arg); -    void ORPD(X64Reg regOp, OpArg arg); -    void XORPS(X64Reg regOp, OpArg arg); -    void XORPD(X64Reg regOp, OpArg arg); +    void ANDPS(X64Reg regOp, const OpArg& arg); +    void ANDPD(X64Reg regOp, const OpArg& arg); +    void ANDNPS(X64Reg regOp, const OpArg& arg); +    void ANDNPD(X64Reg regOp, const OpArg& arg); +    void ORPS(X64Reg regOp, const OpArg& arg); +    void ORPD(X64Reg regOp, const OpArg& arg); +    void XORPS(X64Reg regOp, const OpArg& arg); +    void XORPD(X64Reg regOp, const OpArg& arg);      // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. -    void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); -    void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); +    void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); +    void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);      // SSE/SSE2: Useful alternative to shuffle in some cases. -    void MOVDDUP(X64Reg regOp, OpArg arg); - -    // TODO: Actually implement -#if 0 -    // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... -    void ADDSUBPS(X64Reg dest, OpArg src); -    void ADDSUBPD(X64Reg dest, OpArg src); -    void HADDPD(X64Reg dest, OpArg src); -    void HSUBPS(X64Reg dest, OpArg src); -    void HSUBPD(X64Reg dest, OpArg src); - -    // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". -    void DPPD(X64Reg dest, OpArg src, u8 arg); - -    // These are probably useful for VFPU emulation. -    void INSERTPS(X64Reg dest, OpArg src, u8 arg); -    void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); -#endif +    void MOVDDUP(X64Reg regOp, const OpArg& arg);      // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. -    void HADDPS(X64Reg dest, OpArg src); +    void HADDPS(X64Reg dest, const OpArg& src);      // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". -    void DPPS(X64Reg dest, OpArg src, u8 arg); +    void DPPS(X64Reg dest, const OpArg& src, u8 arg); -    void UNPCKLPS(X64Reg dest, OpArg src); -    void UNPCKHPS(X64Reg dest, OpArg src); -    void UNPCKLPD(X64Reg dest, OpArg src); -    void UNPCKHPD(X64Reg dest, OpArg src); +    void UNPCKLPS(X64Reg dest, const OpArg& src); +    void UNPCKHPS(X64Reg dest, const OpArg& src); +    void UNPCKLPD(X64Reg dest, const OpArg& src); +    void UNPCKHPD(X64Reg dest, const OpArg& src);      // SSE/SSE2: Compares. -    void COMISS(X64Reg regOp, OpArg arg); -    void COMISD(X64Reg regOp, OpArg arg); -    void UCOMISS(X64Reg regOp, OpArg arg); -    void UCOMISD(X64Reg regOp, OpArg arg); +    void COMISS(X64Reg regOp, const OpArg& arg); +    void COMISD(X64Reg regOp, const OpArg& arg); +    void UCOMISS(X64Reg regOp, const OpArg& arg); +    void UCOMISD(X64Reg regOp, const OpArg& arg);      // SSE/SSE2: Moves. Use the right data type for your data, in most cases. -    void MOVAPS(X64Reg regOp, OpArg arg); -    void MOVAPD(X64Reg regOp, OpArg arg); -    void MOVAPS(OpArg arg, X64Reg regOp); -    void MOVAPD(OpArg arg, X64Reg regOp); - -    void MOVUPS(X64Reg regOp, OpArg arg); -    void MOVUPD(X64Reg regOp, OpArg arg); -    void MOVUPS(OpArg arg, X64Reg regOp); -    void MOVUPD(OpArg arg, X64Reg regOp); - -    void MOVDQA(X64Reg regOp, OpArg arg); -    void MOVDQA(OpArg arg, X64Reg regOp); -    void MOVDQU(X64Reg regOp, OpArg arg); -    void MOVDQU(OpArg arg, X64Reg regOp); - -    void MOVSS(X64Reg regOp, OpArg arg); -    void MOVSD(X64Reg regOp, OpArg arg); -    void MOVSS(OpArg arg, X64Reg regOp); -    void MOVSD(OpArg arg, X64Reg regOp); - -    void MOVLPS(X64Reg regOp, OpArg arg); -    void MOVLPD(X64Reg regOp, OpArg arg); -    void MOVLPS(OpArg arg, X64Reg regOp); -    void MOVLPD(OpArg arg, X64Reg regOp); - -    void MOVHPS(X64Reg regOp, OpArg arg); -    void MOVHPD(X64Reg regOp, OpArg arg); -    void MOVHPS(OpArg arg, X64Reg regOp); -    void MOVHPD(OpArg arg, X64Reg regOp); +    void MOVAPS(X64Reg regOp, const OpArg& arg); +    void MOVAPD(X64Reg regOp, const OpArg& arg); +    void MOVAPS(const OpArg& arg, X64Reg regOp); +    void MOVAPD(const OpArg& arg, X64Reg regOp); + +    void MOVUPS(X64Reg regOp, const OpArg& arg); +    void MOVUPD(X64Reg regOp, const OpArg& arg); +    void MOVUPS(const OpArg& arg, X64Reg regOp); +    void MOVUPD(const OpArg& arg, X64Reg regOp); + +    void MOVDQA(X64Reg regOp, const OpArg& arg); +    void MOVDQA(const OpArg& arg, X64Reg regOp); +    void MOVDQU(X64Reg regOp, const OpArg& arg); +    void MOVDQU(const OpArg& arg, X64Reg regOp); + +    void MOVSS(X64Reg regOp, const OpArg& arg); +    void MOVSD(X64Reg regOp, const OpArg& arg); +    void MOVSS(const OpArg& arg, X64Reg regOp); +    void MOVSD(const OpArg& arg, X64Reg regOp); + +    void MOVLPS(X64Reg regOp, const OpArg& arg); +    void MOVLPD(X64Reg regOp, const OpArg& arg); +    void MOVLPS(const OpArg& arg, X64Reg regOp); +    void MOVLPD(const OpArg& arg, X64Reg regOp); + +    void MOVHPS(X64Reg regOp, const OpArg& arg); +    void MOVHPD(X64Reg regOp, const OpArg& arg); +    void MOVHPS(const OpArg& arg, X64Reg regOp); +    void MOVHPD(const OpArg& arg, X64Reg regOp);      void MOVHLPS(X64Reg regOp1, X64Reg regOp2);      void MOVLHPS(X64Reg regOp1, X64Reg regOp2); -    void MOVD_xmm(X64Reg dest, const OpArg &arg); +    void MOVD_xmm(X64Reg dest, const OpArg& arg);      void MOVQ_xmm(X64Reg dest, OpArg arg); -    void MOVD_xmm(const OpArg &arg, X64Reg src); +    void MOVD_xmm(const OpArg& arg, X64Reg src);      void MOVQ_xmm(OpArg arg, X64Reg src);      // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. -    void MOVMSKPS(X64Reg dest, OpArg arg); -    void MOVMSKPD(X64Reg dest, OpArg arg); +    void MOVMSKPS(X64Reg dest, const OpArg& arg); +    void MOVMSKPD(X64Reg dest, const OpArg& arg);      // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.      void MASKMOVDQU(X64Reg dest, X64Reg src); -    void LDDQU(X64Reg dest, OpArg src); +    void LDDQU(X64Reg dest, const OpArg& src);      // SSE/SSE2: Data type conversions. -    void CVTPS2PD(X64Reg dest, OpArg src); -    void CVTPD2PS(X64Reg dest, OpArg src); -    void CVTSS2SD(X64Reg dest, OpArg src); -    void CVTSI2SS(X64Reg dest, OpArg src); -    void CVTSD2SS(X64Reg dest, OpArg src); -    void CVTSI2SD(X64Reg dest, OpArg src); -    void CVTDQ2PD(X64Reg regOp, OpArg arg); -    void CVTPD2DQ(X64Reg regOp, OpArg arg); -    void CVTDQ2PS(X64Reg regOp, OpArg arg); -    void CVTPS2DQ(X64Reg regOp, OpArg arg); - -    void CVTTPS2DQ(X64Reg regOp, OpArg arg); -    void CVTTPD2DQ(X64Reg regOp, OpArg arg); +    void CVTPS2PD(X64Reg dest, const OpArg& src); +    void CVTPD2PS(X64Reg dest, const OpArg& src); +    void CVTSS2SD(X64Reg dest, const OpArg& src); +    void CVTSI2SS(X64Reg dest, const OpArg& src); +    void CVTSD2SS(X64Reg dest, const OpArg& src); +    void CVTSI2SD(X64Reg dest, const OpArg& src); +    void CVTDQ2PD(X64Reg regOp, const OpArg& arg); +    void CVTPD2DQ(X64Reg regOp, const OpArg& arg); +    void CVTDQ2PS(X64Reg regOp, const OpArg& arg); +    void CVTPS2DQ(X64Reg regOp, const OpArg& arg); + +    void CVTTPS2DQ(X64Reg regOp, const OpArg& arg); +    void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);      // Destinations are X64 regs (rax, rbx, ...) for these instructions. -    void CVTSS2SI(X64Reg xregdest, OpArg src); -    void CVTSD2SI(X64Reg xregdest, OpArg src); -    void CVTTSS2SI(X64Reg xregdest, OpArg arg); -    void CVTTSD2SI(X64Reg xregdest, OpArg arg); +    void CVTSS2SI(X64Reg xregdest, const OpArg& src); +    void CVTSD2SI(X64Reg xregdest, const OpArg& src); +    void CVTTSS2SI(X64Reg xregdest, const OpArg& arg); +    void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);      // SSE2: Packed integer instructions -    void PACKSSDW(X64Reg dest, OpArg arg); -    void PACKSSWB(X64Reg dest, OpArg arg); -    void PACKUSDW(X64Reg dest, OpArg arg); -    void PACKUSWB(X64Reg dest, OpArg arg); +    void PACKSSDW(X64Reg dest, const OpArg& arg); +    void PACKSSWB(X64Reg dest, const OpArg& arg); +    void PACKUSDW(X64Reg dest, const OpArg& arg); +    void PACKUSWB(X64Reg dest, const OpArg& arg);      void PUNPCKLBW(X64Reg dest, const OpArg &arg);      void PUNPCKLWD(X64Reg dest, const OpArg &arg);      void PUNPCKLDQ(X64Reg dest, const OpArg &arg);      void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); -    void PTEST(X64Reg dest, OpArg arg); -    void PAND(X64Reg dest, OpArg arg); -    void PANDN(X64Reg dest, OpArg arg); -    void PXOR(X64Reg dest, OpArg arg); -    void POR(X64Reg dest, OpArg arg); - -    void PADDB(X64Reg dest, OpArg arg); -    void PADDW(X64Reg dest, OpArg arg); -    void PADDD(X64Reg dest, OpArg arg); -    void PADDQ(X64Reg dest, OpArg arg); - -    void PADDSB(X64Reg dest, OpArg arg); -    void PADDSW(X64Reg dest, OpArg arg); -    void PADDUSB(X64Reg dest, OpArg arg); -    void PADDUSW(X64Reg dest, OpArg arg); - -    void PSUBB(X64Reg dest, OpArg arg); -    void PSUBW(X64Reg dest, OpArg arg); -    void PSUBD(X64Reg dest, OpArg arg); -    void PSUBQ(X64Reg dest, OpArg arg); - -    void PSUBSB(X64Reg dest, OpArg arg); -    void PSUBSW(X64Reg dest, OpArg arg); -    void PSUBUSB(X64Reg dest, OpArg arg); -    void PSUBUSW(X64Reg dest, OpArg arg); - -    void PAVGB(X64Reg dest, OpArg arg); -    void PAVGW(X64Reg dest, OpArg arg); - -    void PCMPEQB(X64Reg dest, OpArg arg); -    void PCMPEQW(X64Reg dest, OpArg arg); -    void PCMPEQD(X64Reg dest, OpArg arg); - -    void PCMPGTB(X64Reg dest, OpArg arg); -    void PCMPGTW(X64Reg dest, OpArg arg); -    void PCMPGTD(X64Reg dest, OpArg arg); - -    void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); -    void PINSRW(X64Reg dest, OpArg arg, u8 subreg); - -    void PMADDWD(X64Reg dest, OpArg arg); -    void PSADBW(X64Reg dest, OpArg arg); - -    void PMAXSW(X64Reg dest, OpArg arg); -    void PMAXUB(X64Reg dest, OpArg arg); -    void PMINSW(X64Reg dest, OpArg arg); -    void PMINUB(X64Reg dest, OpArg arg); +    void PTEST(X64Reg dest, const OpArg& arg); +    void PAND(X64Reg dest, const OpArg& arg); +    void PANDN(X64Reg dest, const OpArg& arg); +    void PXOR(X64Reg dest, const OpArg& arg); +    void POR(X64Reg dest, const OpArg& arg); + +    void PADDB(X64Reg dest, const OpArg& arg); +    void PADDW(X64Reg dest, const OpArg& arg); +    void PADDD(X64Reg dest, const OpArg& arg); +    void PADDQ(X64Reg dest, const OpArg& arg); + +    void PADDSB(X64Reg dest, const OpArg& arg); +    void PADDSW(X64Reg dest, const OpArg& arg); +    void PADDUSB(X64Reg dest, const OpArg& arg); +    void PADDUSW(X64Reg dest, const OpArg& arg); + +    void PSUBB(X64Reg dest, const OpArg& arg); +    void PSUBW(X64Reg dest, const OpArg& arg); +    void PSUBD(X64Reg dest, const OpArg& arg); +    void PSUBQ(X64Reg dest, const OpArg& arg); + +    void PSUBSB(X64Reg dest, const OpArg& arg); +    void PSUBSW(X64Reg dest, const OpArg& arg); +    void PSUBUSB(X64Reg dest, const OpArg& arg); +    void PSUBUSW(X64Reg dest, const OpArg& arg); + +    void PAVGB(X64Reg dest, const OpArg& arg); +    void PAVGW(X64Reg dest, const OpArg& arg); + +    void PCMPEQB(X64Reg dest, const OpArg& arg); +    void PCMPEQW(X64Reg dest, const OpArg& arg); +    void PCMPEQD(X64Reg dest, const OpArg& arg); + +    void PCMPGTB(X64Reg dest, const OpArg& arg); +    void PCMPGTW(X64Reg dest, const OpArg& arg); +    void PCMPGTD(X64Reg dest, const OpArg& arg); + +    void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg); +    void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg); + +    void PMADDWD(X64Reg dest, const OpArg& arg); +    void PSADBW(X64Reg dest, const OpArg& arg); + +    void PMAXSW(X64Reg dest, const OpArg& arg); +    void PMAXUB(X64Reg dest, const OpArg& arg); +    void PMINSW(X64Reg dest, const OpArg& arg); +    void PMINUB(X64Reg dest, const OpArg& arg);      // SSE4: More MAX/MIN instructions. -    void PMINSB(X64Reg dest, OpArg arg); -    void PMINSD(X64Reg dest, OpArg arg); -    void PMINUW(X64Reg dest, OpArg arg); -    void PMINUD(X64Reg dest, OpArg arg); -    void PMAXSB(X64Reg dest, OpArg arg); -    void PMAXSD(X64Reg dest, OpArg arg); -    void PMAXUW(X64Reg dest, OpArg arg); -    void PMAXUD(X64Reg dest, OpArg arg); - -    void PMOVMSKB(X64Reg dest, OpArg arg); -    void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); -    void PSHUFB(X64Reg dest, OpArg arg); - -    void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); -    void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); +    void PMINSB(X64Reg dest, const OpArg& arg); +    void PMINSD(X64Reg dest, const OpArg& arg); +    void PMINUW(X64Reg dest, const OpArg& arg); +    void PMINUD(X64Reg dest, const OpArg& arg); +    void PMAXSB(X64Reg dest, const OpArg& arg); +    void PMAXSD(X64Reg dest, const OpArg& arg); +    void PMAXUW(X64Reg dest, const OpArg& arg); +    void PMAXUD(X64Reg dest, const OpArg& arg); + +    void PMOVMSKB(X64Reg dest, const OpArg& arg); +    void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle); +    void PSHUFB(X64Reg dest, const OpArg& arg); + +    void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle); +    void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);      void PSRLW(X64Reg reg, int shift);      void PSRLD(X64Reg reg, int shift);      void PSRLQ(X64Reg reg, int shift); -    void PSRLQ(X64Reg reg, OpArg arg); +    void PSRLQ(X64Reg reg, const OpArg& arg);      void PSRLDQ(X64Reg reg, int shift);      void PSLLW(X64Reg reg, int shift); @@ -833,198 +813,198 @@ public:      void PSRAD(X64Reg reg, int shift);      // SSE4: data type conversions -    void PMOVSXBW(X64Reg dest, OpArg arg); -    void PMOVSXBD(X64Reg dest, OpArg arg); -    void PMOVSXBQ(X64Reg dest, OpArg arg); -    void PMOVSXWD(X64Reg dest, OpArg arg); -    void PMOVSXWQ(X64Reg dest, OpArg arg); -    void PMOVSXDQ(X64Reg dest, OpArg arg); -    void PMOVZXBW(X64Reg dest, OpArg arg); -    void PMOVZXBD(X64Reg dest, OpArg arg); -    void PMOVZXBQ(X64Reg dest, OpArg arg); -    void PMOVZXWD(X64Reg dest, OpArg arg); -    void PMOVZXWQ(X64Reg dest, OpArg arg); -    void PMOVZXDQ(X64Reg dest, OpArg arg); +    void PMOVSXBW(X64Reg dest, const OpArg& arg); +    void PMOVSXBD(X64Reg dest, const OpArg& arg); +    void PMOVSXBQ(X64Reg dest, const OpArg& arg); +    void PMOVSXWD(X64Reg dest, const OpArg& arg); +    void PMOVSXWQ(X64Reg dest, const OpArg& arg); +    void PMOVSXDQ(X64Reg dest, const OpArg& arg); +    void PMOVZXBW(X64Reg dest, const OpArg& arg); +    void PMOVZXBD(X64Reg dest, const OpArg& arg); +    void PMOVZXBQ(X64Reg dest, const OpArg& arg); +    void PMOVZXWD(X64Reg dest, const OpArg& arg); +    void PMOVZXWQ(X64Reg dest, const OpArg& arg); +    void PMOVZXDQ(X64Reg dest, const OpArg& arg);      // SSE4: variable blend instructions (xmm0 implicit argument) -    void PBLENDVB(X64Reg dest, OpArg arg); -    void BLENDVPS(X64Reg dest, OpArg arg); -    void BLENDVPD(X64Reg dest, OpArg arg); +    void PBLENDVB(X64Reg dest, const OpArg& arg); +    void BLENDVPS(X64Reg dest, const OpArg& arg); +    void BLENDVPD(X64Reg dest, const OpArg& arg);      void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);      void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);      // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) -    void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); -    void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); -    void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); -    void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); - -    inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } -    inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } -    inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } -    inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } - -    inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } -    inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } -    inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } -    inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } - -    inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } -    inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } -    inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } -    inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } - -    inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } -    inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } -    inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } -    inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } +    void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode); +    void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode); +    void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); +    void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); + +    void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } +    void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } +    void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); } +    void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); } + +    void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } +    void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } +    void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); } +    void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); } + +    void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } +    void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } +    void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); } +    void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); } + +    void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } +    void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } +    void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); } +    void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }      // AVX -    void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); -    void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - -    void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - -    void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); +    void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle); +    void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + +    void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + +    void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);      // FMA3 -    void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); +    void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);      // VEX GPR instructions -    void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); -    void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); -    void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); -    void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); -    void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); -    void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); -    void BLSR(int bits, X64Reg regOp, OpArg arg); -    void BLSMSK(int bits, X64Reg regOp, OpArg arg); -    void BLSI(int bits, X64Reg regOp, OpArg arg); -    void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); -    void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); +    void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); +    void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); +    void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); +    void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate); +    void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); +    void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); +    void BLSR(int bits, X64Reg regOp, const OpArg& arg); +    void BLSMSK(int bits, X64Reg regOp, const OpArg& arg); +    void BLSI(int bits, X64Reg regOp, const OpArg& arg); +    void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); +    void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);      void RDTSC();      // Utility functions      // The difference between this and CALL is that this aligns the stack      // where appropriate. -    void ABI_CallFunction(const void *func); +    void ABI_CallFunction(const void* func);      template <typename T>      void ABI_CallFunction(T (*func)()) { -        ABI_CallFunction((const void *)func); +        ABI_CallFunction((const void*)func);      } -    void ABI_CallFunction(const u8 *func) { -        ABI_CallFunction((const void *)func); +    void ABI_CallFunction(const u8* func) { +        ABI_CallFunction((const void*)func);      } -    void ABI_CallFunctionC16(const void *func, u16 param1); -    void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); +    void ABI_CallFunctionC16(const void* func, u16 param1); +    void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);      // These only support u32 parameters, but that's enough for a lot of uses.      // These will destroy the 1 or 2 first "parameter regs". -    void ABI_CallFunctionC(const void *func, u32 param1); -    void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); -    void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); -    void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); -    void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); -    void ABI_CallFunctionP(const void *func, void *param1); -    void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); -    void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); -    void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); -    void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); -    void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); -    void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); -    void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); +    void ABI_CallFunctionC(const void* func, u32 param1); +    void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2); +    void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3); +    void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3); +    void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4); +    void ABI_CallFunctionP(const void* func, void* param1); +    void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2); +    void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3); +    void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3); +    void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2); +    void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3); +    void ABI_CallFunctionA(const void* func, const OpArg& arg1); +    void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);      // Pass a register as a parameter. -    void ABI_CallFunctionR(const void *func, X64Reg reg1); -    void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); +    void ABI_CallFunctionR(const void* func, X64Reg reg1); +    void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);      template <typename Tr, typename T1>      void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { -        ABI_CallFunctionC((const void *)func, param1); +        ABI_CallFunctionC((const void*)func, param1);      }      // A function that doesn't have any control over what it will do to regs, @@ -1048,9 +1028,9 @@ public:      void ABI_EmitEpilogue(int maxCallParams);      #ifdef _M_IX86 -    inline int ABI_GetNumXMMRegs() { return 8; } +    static int ABI_GetNumXMMRegs() { return 8; }      #else -    inline int ABI_GetNumXMMRegs() { return 16; } +    static int ABI_GetNumXMMRegs() { return 16; }      #endif  };  // class XEmitter diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6cc60fd58..c17290b9b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -29,6 +29,7 @@ set(SRCS              hle/kernel/address_arbiter.cpp              hle/kernel/event.cpp              hle/kernel/kernel.cpp +            hle/kernel/memory.cpp              hle/kernel/mutex.cpp              hle/kernel/process.cpp              hle/kernel/resource_limit.cpp @@ -115,7 +116,6 @@ set(SRCS              loader/loader.cpp              loader/ncch.cpp              tracer/recorder.cpp -            mem_map.cpp              memory.cpp              settings.cpp              system.cpp @@ -157,6 +157,7 @@ set(HEADERS              hle/kernel/address_arbiter.h              hle/kernel/event.h              hle/kernel/kernel.h +            hle/kernel/memory.h              hle/kernel/mutex.h              hle/kernel/process.h              hle/kernel/resource_limit.h @@ -245,7 +246,6 @@ set(HEADERS              loader/ncch.h              tracer/recorder.h              tracer/citrace.h -            mem_map.h              memory.h              memory_setup.h              settings.h diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp index ccb2eb0eb..0491717dc 100644 --- a/src/core/arm/skyeye_common/armstate.cpp +++ b/src/core/arm/skyeye_common/armstate.cpp @@ -4,7 +4,6 @@  #include "common/swap.h"  #include "common/logging/log.h" -#include "core/mem_map.h"  #include "core/memory.h"  #include "core/arm/skyeye_common/armstate.h"  #include "core/arm/skyeye_common/vfp/vfp.h" diff --git a/src/core/arm/skyeye_common/armsupp.cpp b/src/core/arm/skyeye_common/armsupp.cpp index d31fb9449..883713e86 100644 --- a/src/core/arm/skyeye_common/armsupp.cpp +++ b/src/core/arm/skyeye_common/armsupp.cpp @@ -17,7 +17,6 @@  #include "common/logging/log.h" -#include "core/mem_map.h"  #include "core/arm/skyeye_common/arm_regformat.h"  #include "core/arm/skyeye_common/armstate.h"  #include "core/arm/skyeye_common/armsupp.h" diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp index aea936d2d..b1a72dc0c 100644 --- a/src/core/hle/config_mem.cpp +++ b/src/core/hle/config_mem.cpp @@ -25,10 +25,6 @@ void Init() {      config_mem.sys_core_ver = 0x2;      config_mem.unit_info = 0x1; // Bit 0 set for Retail      config_mem.prev_firm = 0; -    config_mem.app_mem_type = 0x2; // Default app mem type is 0 -    config_mem.app_mem_alloc = 0x06000000; // Set to 96MB, since some games use more than the default (64MB) -    config_mem.base_mem_alloc = 0x01400000; // Default base memory is 20MB -    config_mem.sys_mem_alloc = Memory::FCRAM_SIZE - (config_mem.app_mem_alloc + config_mem.base_mem_alloc);      config_mem.firm_unk = 0;      config_mem.firm_version_rev = 0;      config_mem.firm_version_min = 0x40; @@ -36,7 +32,4 @@ void Init() {      config_mem.firm_sys_core_ver = 0x2;  } -void Shutdown() { -} -  } // namespace diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h index 9825a09e8..24a1254f2 100644 --- a/src/core/hle/config_mem.h +++ b/src/core/hle/config_mem.h @@ -52,6 +52,5 @@ static_assert(sizeof(ConfigMemDef) == Memory::CONFIG_MEMORY_SIZE, "Config Memory  extern ConfigMemDef config_mem;  void Init(); -void Shutdown();  } // namespace diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h index 1a0518926..5846a161b 100644 --- a/src/core/hle/function_wrappers.h +++ b/src/core/hle/function_wrappers.h @@ -172,6 +172,14 @@ template<ResultCode func(u32, s64, s64)> void Wrap() {      FuncReturn(func(PARAM(0), param1, param2).raw);  } +template<ResultCode func(s64*, Handle, u32)> void Wrap() { +    s64 param_1 = 0; +    u32 retval = func(¶m_1, PARAM(1), PARAM(2)).raw; +    Core::g_app_core->SetReg(1, (u32)param_1); +    Core::g_app_core->SetReg(2, (u32)(param_1 >> 32)); +    FuncReturn(retval); +} +  ////////////////////////////////////////////////////////////////////////////////////////////////////  // Function wrappers that return type u32 diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index cd0a400dc..331b1b22a 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp @@ -34,8 +34,6 @@ void Reschedule(const char *reason) {  void Init() {      Service::Init(); -    ConfigMem::Init(); -    SharedPage::Init();      g_reschedule = false; @@ -43,8 +41,6 @@ void Init() {  }  void Shutdown() { -    ConfigMem::Shutdown(); -    SharedPage::Shutdown();      Service::Shutdown();      LOG_DEBUG(Kernel, "shutdown OK"); diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 5711c0405..7a401a965 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -7,11 +7,14 @@  #include "common/assert.h"  #include "common/logging/log.h" +#include "core/hle/config_mem.h"  #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/resource_limit.h" +#include "core/hle/kernel/memory.h"  #include "core/hle/kernel/process.h" +#include "core/hle/kernel/resource_limit.h"  #include "core/hle/kernel/thread.h"  #include "core/hle/kernel/timer.h" +#include "core/hle/shared_page.h"  namespace Kernel { @@ -119,6 +122,13 @@ void HandleTable::Clear() {  /// Initialize the kernel  void Init() { +    ConfigMem::Init(); +    SharedPage::Init(); + +    // TODO(yuriks): The memory type parameter needs to be determined by the ExHeader field instead +    // For now it defaults to the one with a largest allocation to the app +    Kernel::MemoryInit(2); // Allocates 96MB to the application +      Kernel::ResourceLimitsInit();      Kernel::ThreadingInit();      Kernel::TimersInit(); @@ -131,11 +141,14 @@ void Init() {  /// Shutdown the kernel  void Shutdown() { +    g_handle_table.Clear(); // Free all kernel objects +      Kernel::ThreadingShutdown(); +    g_current_process = nullptr; +      Kernel::TimersShutdown();      Kernel::ResourceLimitsShutdown(); -    g_handle_table.Clear(); // Free all kernel objects -    g_current_process = nullptr; +    Kernel::MemoryShutdown();  }  } // namespace diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp new file mode 100644 index 000000000..e4fc5f3c4 --- /dev/null +++ b/src/core/hle/kernel/memory.cpp @@ -0,0 +1,136 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <map> +#include <memory> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "common/logging/log.h" + +#include "core/hle/config_mem.h" +#include "core/hle/kernel/memory.h" +#include "core/hle/kernel/vm_manager.h" +#include "core/hle/result.h" +#include "core/hle/shared_page.h" +#include "core/memory.h" +#include "core/memory_setup.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Kernel { + +static MemoryRegionInfo memory_regions[3]; + +/// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each sytem +/// memory configuration type. +static const u32 memory_region_sizes[8][3] = { +    // Old 3DS layouts +    {0x04000000, 0x02C00000, 0x01400000}, // 0 +    { /* This appears to be unused. */ }, // 1 +    {0x06000000, 0x00C00000, 0x01400000}, // 2 +    {0x05000000, 0x01C00000, 0x01400000}, // 3 +    {0x04800000, 0x02400000, 0x01400000}, // 4 +    {0x02000000, 0x04C00000, 0x01400000}, // 5 + +    // New 3DS layouts +    {0x07C00000, 0x06400000, 0x02000000}, // 6 +    {0x0B200000, 0x02E00000, 0x02000000}, // 7 +}; + +void MemoryInit(u32 mem_type) { +    // TODO(yuriks): On the n3DS, all o3DS configurations (<=5) are forced to 6 instead. +    ASSERT_MSG(mem_type <= 5, "New 3DS memory configuration aren't supported yet!"); +    ASSERT(mem_type != 1); + +    // The kernel allocation regions (APPLICATION, SYSTEM and BASE) are laid out in sequence, with +    // the sizes specified in the memory_region_sizes table. +    VAddr base = 0; +    for (int i = 0; i < 3; ++i) { +        memory_regions[i].base = base; +        memory_regions[i].size = memory_region_sizes[mem_type][i]; +        memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); + +        base += memory_regions[i].size; +    } + +    // We must've allocated the entire FCRAM by the end +    ASSERT(base == Memory::FCRAM_SIZE); + +    using ConfigMem::config_mem; +    config_mem.app_mem_type = mem_type; +    // app_mem_malloc does not always match the configured size for memory_region[0]: in case the +    // n3DS type override is in effect it reports the size the game expects, not the real one. +    config_mem.app_mem_alloc = memory_region_sizes[mem_type][0]; +    config_mem.sys_mem_alloc = memory_regions[1].size; +    config_mem.base_mem_alloc = memory_regions[2].size; +} + +void MemoryShutdown() { +    for (auto& region : memory_regions) { +        region.base = 0; +        region.size = 0; +        region.linear_heap_memory = nullptr; +    } +} + +MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) { +    switch (region) { +    case MemoryRegion::APPLICATION: +        return &memory_regions[0]; +    case MemoryRegion::SYSTEM: +        return &memory_regions[1]; +    case MemoryRegion::BASE: +        return &memory_regions[2]; +    default: +        UNREACHABLE(); +    } +} + +} + +namespace Memory { + +namespace { + +struct MemoryArea { +    u32 base; +    u32 size; +    const char* name; +}; + +// We don't declare the IO regions in here since its handled by other means. +static MemoryArea memory_areas[] = { +    {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE,     "Shared Memory"}, // Shared memory +    {VRAM_VADDR,          VRAM_SIZE,              "VRAM"},          // Video memory (VRAM) +    {DSP_RAM_VADDR,       DSP_RAM_SIZE,           "DSP RAM"},       // DSP memory +    {TLS_AREA_VADDR,      TLS_AREA_SIZE,          "TLS Area"},      // TLS memory +}; + +} + +void Init() { +    InitMemoryMap(); +    LOG_DEBUG(HW_Memory, "initialized OK"); +} + +void InitLegacyAddressSpace(Kernel::VMManager& address_space) { +    using namespace Kernel; + +    for (MemoryArea& area : memory_areas) { +        auto block = std::make_shared<std::vector<u8>>(area.size); +        address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap(); +    } + +    auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR, +            (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom(); +    address_space.Reprotect(cfg_mem_vma, VMAPermission::Read); + +    auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, +            (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); +    address_space.Reprotect(shared_page_vma, VMAPermission::Read); +} + +} // namespace diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h new file mode 100644 index 000000000..36690b091 --- /dev/null +++ b/src/core/hle/kernel/memory.h @@ -0,0 +1,35 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> + +#include "common/common_types.h" + +#include "core/hle/kernel/process.h" + +namespace Kernel { + +class VMManager; + +struct MemoryRegionInfo { +    u32 base; // Not an address, but offset from start of FCRAM +    u32 size; + +    std::shared_ptr<std::vector<u8>> linear_heap_memory; +}; + +void MemoryInit(u32 mem_type); +void MemoryShutdown(); +MemoryRegionInfo* GetMemoryRegion(MemoryRegion region); + +} + +namespace Memory { + +void Init(); +void InitLegacyAddressSpace(Kernel::VMManager& address_space); + +} // namespace diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index a7892c652..124047a53 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -7,11 +7,11 @@  #include "common/logging/log.h"  #include "common/make_unique.h" +#include "core/hle/kernel/memory.h"  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/resource_limit.h"  #include "core/hle/kernel/thread.h"  #include "core/hle/kernel/vm_manager.h" -#include "core/mem_map.h"  #include "core/memory.h"  namespace Kernel { @@ -36,8 +36,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) {      process->codeset = std::move(code_set);      process->flags.raw = 0;      process->flags.memory_region = MemoryRegion::APPLICATION; -    process->address_space = Common::make_unique<VMManager>(); -    Memory::InitLegacyAddressSpace(*process->address_space); +    Memory::InitLegacyAddressSpace(process->vm_manager);      return process;  } @@ -93,9 +92,11 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {              mapping.unk_flag = false;          } else if ((type & 0xFE0) == 0xFC0) { // 0x01FF              // Kernel version -            int minor = descriptor & 0xFF; -            int major = (descriptor >> 8) & 0xFF; -            LOG_INFO(Loader, "ExHeader kernel version ignored: %d.%d", major, minor); +            kernel_version = descriptor & 0xFFFF; + +            int minor = kernel_version & 0xFF; +            int major = (kernel_version >> 8) & 0xFF; +            LOG_INFO(Loader, "ExHeader kernel version: %d.%d", major, minor);          } else {              LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor);          } @@ -103,20 +104,153 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {  }  void Process::Run(s32 main_thread_priority, u32 stack_size) { +    memory_region = GetMemoryRegion(flags.memory_region); +      auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) { -        auto vma = address_space->MapMemoryBlock(segment.addr, codeset->memory, +        auto vma = vm_manager.MapMemoryBlock(segment.addr, codeset->memory,                  segment.offset, segment.size, memory_state).Unwrap(); -        address_space->Reprotect(vma, permissions); +        vm_manager.Reprotect(vma, permissions); +        misc_memory_used += segment.size;      }; +    // Map CodeSet segments      MapSegment(codeset->code,   VMAPermission::ReadExecute, MemoryState::Code);      MapSegment(codeset->rodata, VMAPermission::Read,        MemoryState::Code);      MapSegment(codeset->data,   VMAPermission::ReadWrite,   MemoryState::Private); -    address_space->LogLayout(); +    // Allocate and map stack +    vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size, +            std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked +            ).Unwrap(); +    misc_memory_used += stack_size; + +    vm_manager.LogLayout(Log::Level::Debug);      Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority);  } +VAddr Process::GetLinearHeapBase() const { +    return (kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_SIZE) +            + memory_region->base; +} + +VAddr Process::GetLinearHeapLimit() const { +    return GetLinearHeapBase() + memory_region->size; +} + +ResultVal<VAddr> Process::HeapAllocate(VAddr target, u32 size, VMAPermission perms) { +    if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) { +        return ERR_INVALID_ADDRESS; +    } + +    if (heap_memory == nullptr) { +        // Initialize heap +        heap_memory = std::make_shared<std::vector<u8>>(); +        heap_start = heap_end = target; +    } + +    // If necessary, expand backing vector to cover new heap extents. +    if (target < heap_start) { +        heap_memory->insert(begin(*heap_memory), heap_start - target, 0); +        heap_start = target; +        vm_manager.RefreshMemoryBlockMappings(heap_memory.get()); +    } +    if (target + size > heap_end) { +        heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0); +        heap_end = target + size; +        vm_manager.RefreshMemoryBlockMappings(heap_memory.get()); +    } +    ASSERT(heap_end - heap_start == heap_memory->size()); + +    CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, heap_memory, target - heap_start, size, MemoryState::Private)); +    vm_manager.Reprotect(vma, perms); + +    heap_used += size; + +    return MakeResult<VAddr>(heap_end - size); +} + +ResultCode Process::HeapFree(VAddr target, u32 size) { +    if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) { +        return ERR_INVALID_ADDRESS; +    } + +    ResultCode result = vm_manager.UnmapRange(target, size); +    if (result.IsError()) return result; + +    heap_used -= size; + +    return RESULT_SUCCESS; +} + +ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission perms) { +    auto& linheap_memory = memory_region->linear_heap_memory; + +    VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size(); +    // Games and homebrew only ever seem to pass 0 here (which lets the kernel decide the address), +    // but explicit addresses are also accepted and respected. +    if (target == 0) { +        target = heap_end; +    } + +    if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() || +        target > heap_end || target + size < target) { + +        return ERR_INVALID_ADDRESS; +    } + +    // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its +    // end. It's possible to free gaps in the middle of the heap and then reallocate them later, +    // but expansions are only allowed at the end. +    if (target == heap_end) { +        linheap_memory->insert(linheap_memory->end(), size, 0); +        vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); +    } + +    // TODO(yuriks): As is, this lets processes map memory allocated by other processes from the +    // same region. It is unknown if or how the 3DS kernel checks against this. +    size_t offset = target - GetLinearHeapBase(); +    CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, linheap_memory, offset, size, MemoryState::Continuous)); +    vm_manager.Reprotect(vma, perms); + +    linear_heap_used += size; + +    return MakeResult<VAddr>(target); +} + +ResultCode Process::LinearFree(VAddr target, u32 size) { +    auto& linheap_memory = memory_region->linear_heap_memory; + +    if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() || +        target + size < target) { + +        return ERR_INVALID_ADDRESS; +    } + +    VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size(); +    if (target + size > heap_end) { +        return ERR_INVALID_ADDRESS_STATE; +    } + +    ResultCode result = vm_manager.UnmapRange(target, size); +    if (result.IsError()) return result; + +    linear_heap_used -= size; + +    if (target + size == heap_end) { +        // End of linear heap has been freed, so check what's the last allocated block in it and +        // reduce the size. +        auto vma = vm_manager.FindVMA(target); +        ASSERT(vma != vm_manager.vma_map.end()); +        ASSERT(vma->second.type == VMAType::Free); +        VAddr new_end = vma->second.base; +        if (new_end >= GetLinearHeapBase()) { +            linheap_memory->resize(new_end - GetLinearHeapBase()); +        } +    } + +    return RESULT_SUCCESS; +} +  Kernel::Process::Process() {}  Kernel::Process::~Process() {} diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 83d3aceae..60e17f251 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -15,6 +15,7 @@  #include "common/common_types.h"  #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/vm_manager.h"  namespace Kernel { @@ -48,7 +49,7 @@ union ProcessFlags {  };  class ResourceLimit; -class VMManager; +struct MemoryRegionInfo;  struct CodeSet final : public Object {      static SharedPtr<CodeSet> Create(std::string name, u64 program_id); @@ -104,14 +105,12 @@ public:      /// processes access to specific I/O regions and device memory.      boost::container::static_vector<AddressMapping, 8> address_mappings;      ProcessFlags flags; +    /// Kernel compatibility version for this process +    u16 kernel_version = 0;      /// The id of this process      u32 process_id = next_process_id++; -    /// Bitmask of the used TLS slots -    std::bitset<300> used_tls_slots; -    std::unique_ptr<VMManager> address_space; -      /**       * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them       * to this process. @@ -123,6 +122,36 @@ public:       */      void Run(s32 main_thread_priority, u32 stack_size); + +    /////////////////////////////////////////////////////////////////////////////////////////////// +    // Memory Management + +    VMManager vm_manager; + +    // Memory used to back the allocations in the regular heap. A single vector is used to cover +    // the entire virtual address space extents that bound the allocations, including any holes. +    // This makes deallocation and reallocation of holes fast and keeps process memory contiguous +    // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. +    std::shared_ptr<std::vector<u8>> heap_memory; +    // The left/right bounds of the address space covered by heap_memory. +    VAddr heap_start = 0, heap_end = 0; + +    u32 heap_used = 0, linear_heap_used = 0, misc_memory_used = 0; + +    MemoryRegionInfo* memory_region = nullptr; + +    /// Bitmask of the used TLS slots +    std::bitset<300> used_tls_slots; + +    VAddr GetLinearHeapBase() const; +    VAddr GetLinearHeapLimit() const; + +    ResultVal<VAddr> HeapAllocate(VAddr target, u32 size, VMAPermission perms); +    ResultCode HeapFree(VAddr target, u32 size); + +    ResultVal<VAddr> LinearAllocate(VAddr target, u32 size, VMAPermission perms); +    ResultCode LinearFree(VAddr target, u32 size); +  private:      Process();      ~Process() override; diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index 94b3e3298..67dde08c2 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp @@ -6,7 +6,6 @@  #include "common/logging/log.h" -#include "core/mem_map.h"  #include "core/hle/kernel/resource_limit.h"  namespace Kernel { diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 29ea6d531..c10126513 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -117,6 +117,7 @@ void Thread::Stop() {      wait_objects.clear();      Kernel::g_current_process->used_tls_slots[tls_index] = false; +    g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE;      HLE::Reschedule(__func__);  } @@ -414,6 +415,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,      }      ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); +    g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE;      // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used      // to initialize the context @@ -504,7 +506,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {  }  VAddr Thread::GetTLSAddress() const { -    return Memory::TLS_AREA_VADDR + tls_index * 0x200; +    return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;  }  //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 205cc7b53..2610acf76 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -11,6 +11,15 @@  namespace Kernel { +static const char* GetMemoryStateName(MemoryState state) { +    static const char* names[] = { +        "Free", "Reserved", "IO", "Static", "Code", "Private", "Shared", "Continuous", "Aliased", +        "Alias", "AliasCode", "Locked", +    }; + +    return names[(int)state]; +} +  bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {      ASSERT(base + size == next.base);      if (permissions != next.permissions || @@ -51,11 +60,15 @@ void VMManager::Reset() {  }  VMManager::VMAHandle VMManager::FindVMA(VAddr target) const { -    return std::prev(vma_map.upper_bound(target)); +    if (target >= MAX_ADDRESS) { +        return vma_map.end(); +    } else { +        return std::prev(vma_map.upper_bound(target)); +    }  }  ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, -        std::shared_ptr<std::vector<u8>> block, u32 offset, u32 size, MemoryState state) { +        std::shared_ptr<std::vector<u8>> block, size_t offset, u32 size, MemoryState state) {      ASSERT(block != nullptr);      ASSERT(offset + size <= block->size()); @@ -106,10 +119,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u3      return MakeResult<VMAHandle>(MergeAdjacent(vma_handle));  } -void VMManager::Unmap(VMAHandle vma_handle) { -    VMAIter iter = StripIterConstness(vma_handle); - -    VirtualMemoryArea& vma = iter->second; +VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) { +    VirtualMemoryArea& vma = vma_handle->second;      vma.type = VMAType::Free;      vma.permissions = VMAPermission::None;      vma.meminfo_state = MemoryState::Free; @@ -121,26 +132,67 @@ void VMManager::Unmap(VMAHandle vma_handle) {      UpdatePageTableForVMA(vma); -    MergeAdjacent(iter); +    return MergeAdjacent(vma_handle); +} + +ResultCode VMManager::UnmapRange(VAddr target, u32 size) { +    CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); +    VAddr target_end = target + size; + +    VMAIter end = vma_map.end(); +    // The comparison against the end of the range must be done using addresses since VMAs can be +    // merged during this process, causing invalidation of the iterators. +    while (vma != end && vma->second.base < target_end) { +        vma = std::next(Unmap(vma)); +    } + +    ASSERT(FindVMA(target)->second.size >= size); +    return RESULT_SUCCESS;  } -void VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) { +VMManager::VMAHandle VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) {      VMAIter iter = StripIterConstness(vma_handle);      VirtualMemoryArea& vma = iter->second;      vma.permissions = new_perms;      UpdatePageTableForVMA(vma); -    MergeAdjacent(iter); +    return MergeAdjacent(iter); +} + +ResultCode VMManager::ReprotectRange(VAddr target, u32 size, VMAPermission new_perms) { +    CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); +    VAddr target_end = target + size; + +    VMAIter end = vma_map.end(); +    // The comparison against the end of the range must be done using addresses since VMAs can be +    // merged during this process, causing invalidation of the iterators. +    while (vma != end && vma->second.base < target_end) { +        vma = std::next(StripIterConstness(Reprotect(vma, new_perms))); +    } + +    return RESULT_SUCCESS;  } -void VMManager::LogLayout() const { +void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { +    // If this ever proves to have a noticeable performance impact, allow users of the function to +    // specify a specific range of addresses to limit the scan to.      for (const auto& p : vma_map) {          const VirtualMemoryArea& vma = p.second; -        LOG_DEBUG(Kernel, "%08X - %08X  size: %8X %c%c%c", vma.base, vma.base + vma.size, vma.size, +        if (block == vma.backing_block.get()) { +            UpdatePageTableForVMA(vma); +        } +    } +} + +void VMManager::LogLayout(Log::Level log_level) const { +    for (const auto& p : vma_map) { +        const VirtualMemoryArea& vma = p.second; +        LOG_GENERIC(Log::Class::Kernel, log_level, "%08X - %08X  size: %8X %c%c%c %s", +            vma.base, vma.base + vma.size, vma.size,              (u8)vma.permissions & (u8)VMAPermission::Read    ? 'R' : '-',              (u8)vma.permissions & (u8)VMAPermission::Write   ? 'W' : '-', -            (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-'); +            (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-', GetMemoryStateName(vma.meminfo_state));      }  } @@ -151,21 +203,19 @@ VMManager::VMAIter VMManager::StripIterConstness(const VMAHandle & iter) {  }  ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { -    ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: %8X", size); -    ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: %08X", base); +    ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size); +    ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", base);      VMAIter vma_handle = StripIterConstness(FindVMA(base));      if (vma_handle == vma_map.end()) {          // Target address is outside the range managed by the kernel -        return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, -                ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E01BF5 +        return ERR_INVALID_ADDRESS;      }      VirtualMemoryArea& vma = vma_handle->second;      if (vma.type != VMAType::Free) {          // Region is already allocated -        return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, -                ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5 +        return ERR_INVALID_ADDRESS_STATE;      }      u32 start_in_vma = base - vma.base; @@ -173,8 +223,7 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {      if (end_in_vma > vma.size) {          // Requested allocation doesn't fit inside VMA -        return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, -                ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5 +        return ERR_INVALID_ADDRESS_STATE;      }      if (end_in_vma != vma.size) { @@ -189,6 +238,35 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {      return MakeResult<VMAIter>(vma_handle);  } +ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u32 size) { +    ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size); +    ASSERT_MSG((target & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", target); + +    VAddr target_end = target + size; +    ASSERT(target_end >= target); +    ASSERT(target_end <= MAX_ADDRESS); +    ASSERT(size > 0); + +    VMAIter begin_vma = StripIterConstness(FindVMA(target)); +    VMAIter i_end = vma_map.lower_bound(target_end); +    for (auto i = begin_vma; i != i_end; ++i) { +        if (i->second.type == VMAType::Free) { +            return ERR_INVALID_ADDRESS_STATE; +        } +    } + +    if (target != begin_vma->second.base) { +        begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base); +    } + +    VMAIter end_vma = StripIterConstness(FindVMA(target_end)); +    if (end_vma != vma_map.end() && target_end != end_vma->second.base) { +        end_vma = SplitVMA(end_vma, target_end - end_vma->second.base); +    } + +    return MakeResult<VMAIter>(begin_vma); +} +  VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) {      VirtualMemoryArea& old_vma = vma_handle->second;      VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index b3795a94a..4e95f1f0c 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -14,6 +14,14 @@  namespace Kernel { +const ResultCode ERR_INVALID_ADDRESS{ // 0xE0E01BF5 +        ErrorDescription::InvalidAddress, ErrorModule::OS, +        ErrorSummary::InvalidArgument, ErrorLevel::Usage}; + +const ResultCode ERR_INVALID_ADDRESS_STATE{ // 0xE0A01BF5 +        ErrorDescription::InvalidAddress, ErrorModule::OS, +        ErrorSummary::InvalidState, ErrorLevel::Usage}; +  enum class VMAType : u8 {      /// VMA represents an unmapped region of the address space.      Free, @@ -75,7 +83,7 @@ struct VirtualMemoryArea {      /// Memory block backing this VMA.      std::shared_ptr<std::vector<u8>> backing_block = nullptr;      /// Offset into the backing_memory the mapping starts from. -    u32 offset = 0; +    size_t offset = 0;      // Settings for type = BackingMemory      /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed. @@ -141,7 +149,7 @@ public:       * @param state MemoryState tag to attach to the VMA.       */      ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, -            u32 offset, u32 size, MemoryState state); +            size_t offset, u32 size, MemoryState state);      /**       * Maps an unmanaged host memory pointer at a given address. @@ -163,14 +171,23 @@ public:       */      ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state); -    /// Unmaps the given VMA. -    void Unmap(VMAHandle vma); +    /// Unmaps a range of addresses, splitting VMAs as necessary. +    ResultCode UnmapRange(VAddr target, u32 size);      /// Changes the permissions of the given VMA. -    void Reprotect(VMAHandle vma, VMAPermission new_perms); +    VMAHandle Reprotect(VMAHandle vma, VMAPermission new_perms); + +    /// Changes the permissions of a range of addresses, splitting VMAs as necessary. +    ResultCode ReprotectRange(VAddr target, u32 size, VMAPermission new_perms); + +    /** +     * Scans all VMAs and updates the page table range of any that use the given vector as backing +     * memory. This should be called after any operation that causes reallocation of the vector. +     */ +    void RefreshMemoryBlockMappings(const std::vector<u8>* block);      /// Dumps the address space layout to the log, for debugging -    void LogLayout() const; +    void LogLayout(Log::Level log_level) const;  private:      using VMAIter = decltype(vma_map)::iterator; @@ -178,6 +195,9 @@ private:      /// Converts a VMAHandle to a mutable VMAIter.      VMAIter StripIterConstness(const VMAHandle& iter); +    /// Unmaps the given VMA. +    VMAIter Unmap(VMAIter vma); +      /**       * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing       * the appropriate error checking. @@ -185,6 +205,12 @@ private:      ResultVal<VMAIter> CarveVMA(VAddr base, u32 size);      /** +     * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each +     * end of the range. +     */ +    ResultVal<VMAIter> CarveVMARange(VAddr base, u32 size); + +    /**       * Splits a VMA in two, at the specified offset.       * @returns the right side of the split, with the original iterator becoming the left side.       */ diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp index 35402341b..6a2fdea2b 100644 --- a/src/core/hle/service/apt/apt.cpp +++ b/src/core/hle/service/apt/apt.cpp @@ -16,6 +16,7 @@  #include "core/hle/hle.h"  #include "core/hle/kernel/event.h"  #include "core/hle/kernel/mutex.h" +#include "core/hle/kernel/process.h"  #include "core/hle/kernel/shared_memory.h"  #include "core/hle/kernel/thread.h" @@ -37,7 +38,7 @@ static Kernel::SharedPtr<Kernel::Mutex> lock;  static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event  static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event -static std::vector<u8> shared_font; +static std::shared_ptr<std::vector<u8>> shared_font;  static u32 cpu_percent; ///< CPU time available to the running application @@ -74,11 +75,12 @@ void Initialize(Service::Interface* self) {  void GetSharedFont(Service::Interface* self) {      u32* cmd_buff = Kernel::GetCommandBuffer(); -    if (!shared_font.empty()) { -        // TODO(bunnei): This function shouldn't copy the shared font every time it's called. -        // Instead, it should probably map the shared font as RO memory. We don't currently have -        // an easy way to do this, but the copy should be sufficient for now. -        memcpy(Memory::GetPointer(SHARED_FONT_VADDR), shared_font.data(), shared_font.size()); +    if (shared_font != nullptr) { +        // TODO(yuriks): This is a hack to keep this working right now even with our completely +        // broken shared memory system. +        shared_font_mem->base_address = SHARED_FONT_VADDR; +        Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->base_address, +                shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared);          cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2);          cmd_buff[1] = RESULT_SUCCESS.raw; // No error @@ -391,7 +393,6 @@ void Init() {      // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file      // "shared_font.bin" in the Citra "sysdata" directory. -    shared_font.clear();      std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT;      FileUtil::CreateFullPath(filepath); // Create path if not already created @@ -399,8 +400,8 @@ void Init() {      if (file.IsOpen()) {          // Read shared font data -        shared_font.resize((size_t)file.GetSize()); -        file.ReadBytes(shared_font.data(), (size_t)file.GetSize()); +        shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize()); +        file.ReadBytes(shared_font->data(), shared_font->size());          // Create shared font memory object          using Kernel::MemoryPermission; @@ -424,7 +425,7 @@ void Init() {  }  void Shutdown() { -    shared_font.clear(); +    shared_font = nullptr;      shared_font_mem = nullptr;      lock = nullptr;      notification_event = nullptr; diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index e93c1b436..fde508a13 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -4,7 +4,6 @@  #include "common/bit_field.h" -#include "core/mem_map.h"  #include "core/memory.h"  #include "core/hle/kernel/event.h"  #include "core/hle/kernel/shared_memory.h" @@ -418,7 +417,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {      case CommandId::SET_DISPLAY_TRANSFER:      { -        auto& params = command.image_copy; +        auto& params = command.display_transfer;          WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),                  Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);          WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), @@ -433,17 +432,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {      // TODO: Check if texture copies are implemented correctly..      case CommandId::SET_TEXTURE_COPY:      { -        auto& params = command.image_copy; -        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), +        auto& params = command.texture_copy; +        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),                  Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); -        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), +        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),                  Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); -        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); -        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); -        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); - -        // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1? -        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); +        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), +                params.size); +        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), +                params.in_width_gap); +        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), +                params.out_width_gap); +        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), +                params.flags); + +        // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter. +        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);          break;      } diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h index c89d0a467..8bcb30ad1 100644 --- a/src/core/hle/service/gsp_gpu.h +++ b/src/core/hle/service/gsp_gpu.h @@ -127,7 +127,16 @@ struct Command {              u32 in_buffer_size;              u32 out_buffer_size;              u32 flags; -        } image_copy; +        } display_transfer; + +        struct { +            u32 in_buffer_address; +            u32 out_buffer_address; +            u32 size; +            u32 in_width_gap; +            u32 out_width_gap; +            u32 flags; +        } texture_copy;          u8 raw_data[0x1C];      }; diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp index 6e7dafaad..6b1b71fe4 100644 --- a/src/core/hle/service/y2r_u.cpp +++ b/src/core/hle/service/y2r_u.cpp @@ -10,7 +10,6 @@  #include "core/hle/kernel/event.h"  #include "core/hle/service/y2r_u.h"  #include "core/hw/y2r.h" -#include "core/mem_map.h"  #include "video_core/renderer_base.h"  #include "video_core/utils.h" diff --git a/src/core/hle/shared_page.cpp b/src/core/hle/shared_page.cpp index 26d87c7e2..50c5bc01b 100644 --- a/src/core/hle/shared_page.cpp +++ b/src/core/hle/shared_page.cpp @@ -18,7 +18,4 @@ void Init() {      shared_page.running_hw = 0x1; // product  } -void Shutdown() { -} -  } // namespace diff --git a/src/core/hle/shared_page.h b/src/core/hle/shared_page.h index db6a5340b..379bb7b63 100644 --- a/src/core/hle/shared_page.h +++ b/src/core/hle/shared_page.h @@ -54,6 +54,5 @@ static_assert(sizeof(SharedPageDef) == Memory::SHARED_PAGE_SIZE, "Shared page st  extern SharedPageDef shared_page;  void Init(); -void Shutdown();  } // namespace diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index bb64fdfb7..89ac45a6f 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -10,11 +10,11 @@  #include "common/symbols.h"  #include "core/core_timing.h" -#include "core/mem_map.h"  #include "core/arm/arm_interface.h"  #include "core/hle/kernel/address_arbiter.h"  #include "core/hle/kernel/event.h" +#include "core/hle/kernel/memory.h"  #include "core/hle/kernel/mutex.h"  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/resource_limit.h" @@ -41,32 +41,114 @@ const ResultCode ERR_NOT_FOUND(ErrorDescription::NotFound, ErrorModule::Kernel,  const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS,          ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E +const ResultCode ERR_MISALIGNED_ADDRESS{ // 0xE0E01BF1 +        ErrorDescription::MisalignedAddress, ErrorModule::OS, +        ErrorSummary::InvalidArgument, ErrorLevel::Usage}; +const ResultCode ERR_MISALIGNED_SIZE{ // 0xE0E01BF2 +        ErrorDescription::MisalignedSize, ErrorModule::OS, +        ErrorSummary::InvalidArgument, ErrorLevel::Usage}; +const ResultCode ERR_INVALID_COMBINATION{ // 0xE0E01BEE +        ErrorDescription::InvalidCombination, ErrorModule::OS, +        ErrorSummary::InvalidArgument, ErrorLevel::Usage}; +  enum ControlMemoryOperation { -    MEMORY_OPERATION_HEAP       = 0x00000003, -    MEMORY_OPERATION_GSP_HEAP   = 0x00010003, +    MEMOP_FREE    = 1, +    MEMOP_RESERVE = 2, // This operation seems to be unsupported in the kernel +    MEMOP_COMMIT  = 3, +    MEMOP_MAP     = 4, +    MEMOP_UNMAP   = 5, +    MEMOP_PROTECT = 6, +    MEMOP_OPERATION_MASK = 0xFF, + +    MEMOP_REGION_APP    = 0x100, +    MEMOP_REGION_SYSTEM = 0x200, +    MEMOP_REGION_BASE   = 0x300, +    MEMOP_REGION_MASK   = 0xF00, + +    MEMOP_LINEAR = 0x10000,  };  /// Map application or GSP heap memory  static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) { -    LOG_TRACE(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X", +    using namespace Kernel; + +    LOG_DEBUG(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=0x%X, permissions=0x%08X",          operation, addr0, addr1, size, permissions); -    switch (operation) { +    if ((addr0 & Memory::PAGE_MASK) != 0 || (addr1 & Memory::PAGE_MASK) != 0) { +        return ERR_MISALIGNED_ADDRESS; +    } +    if ((size & Memory::PAGE_MASK) != 0) { +        return ERR_MISALIGNED_SIZE; +    } + +    u32 region = operation & MEMOP_REGION_MASK; +    operation &= ~MEMOP_REGION_MASK; + +    if (region != 0) { +        LOG_WARNING(Kernel_SVC, "ControlMemory with specified region not supported, region=%X", region); +    } + +    if ((permissions & (u32)MemoryPermission::ReadWrite) != permissions) { +        return ERR_INVALID_COMBINATION; +    } +    VMAPermission vma_permissions = (VMAPermission)permissions; + +    auto& process = *g_current_process; + +    switch (operation & MEMOP_OPERATION_MASK) { +    case MEMOP_FREE: +    { +        if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { +            ResultCode result = process.HeapFree(addr0, size); +            if (result.IsError()) return result; +        } else if (addr0 >= process.GetLinearHeapBase() && addr0 < process.GetLinearHeapLimit()) { +            ResultCode result = process.LinearFree(addr0, size); +            if (result.IsError()) return result; +        } else { +            return ERR_INVALID_ADDRESS; +        } +        *out_addr = addr0; +        break; +    } + +    case MEMOP_COMMIT: +    { +        if (operation & MEMOP_LINEAR) { +            CASCADE_RESULT(*out_addr, process.LinearAllocate(addr0, size, vma_permissions)); +        } else { +            CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions)); +        } +        break; +    } -    // Map normal heap memory -    case MEMORY_OPERATION_HEAP: -        *out_addr = Memory::MapBlock_Heap(size, operation, permissions); +    case MEMOP_MAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented +    { +        CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions));          break; +    } + +    case MEMOP_UNMAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented +    { +        ResultCode result = process.HeapFree(addr0, size); +        if (result.IsError()) return result; +        break; +    } -    // Map GSP heap memory -    case MEMORY_OPERATION_GSP_HEAP: -        *out_addr = Memory::MapBlock_HeapLinear(size, operation, permissions); +    case MEMOP_PROTECT: +    { +        ResultCode result = process.vm_manager.ReprotectRange(addr0, size, vma_permissions); +        if (result.IsError()) return result;          break; +    } -    // Unknown ControlMemory operation      default:          LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation); +        return ERR_INVALID_COMBINATION;      } + +    process.vm_manager.LogLayout(Log::Level::Trace); +      return RESULT_SUCCESS;  } @@ -537,9 +619,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* page_inf      if (process == nullptr)          return ERR_INVALID_HANDLE; -    auto vma = process->address_space->FindVMA(addr); +    auto vma = process->vm_manager.FindVMA(addr); -    if (vma == process->address_space->vma_map.end()) +    if (vma == Kernel::g_current_process->vm_manager.vma_map.end())          return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);      memory_info->base_address = vma->second.base; @@ -692,6 +774,52 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32      return RESULT_SUCCESS;  } +static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) { +    LOG_TRACE(Kernel_SVC, "called process=0x%08X type=%u", process_handle, type); + +    using Kernel::Process; +    Kernel::SharedPtr<Process> process = Kernel::g_handle_table.Get<Process>(process_handle); +    if (process == nullptr) +        return ERR_INVALID_HANDLE; + +    switch (type) { +    case 0: +    case 2: +        // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure +        // what's the difference between them. +        *out = process->heap_used + process->linear_heap_used + process->misc_memory_used; +        break; +    case 1: +    case 3: +    case 4: +    case 5: +    case 6: +    case 7: +    case 8: +        // These are valid, but not implemented yet +        LOG_ERROR(Kernel_SVC, "unimplemented GetProcessInfo type=%u", type); +        break; +    case 20: +        *out = Memory::FCRAM_PADDR - process->GetLinearHeapBase(); +        break; +    default: +        LOG_ERROR(Kernel_SVC, "unknown GetProcessInfo type=%u", type); + +        if (type >= 21 && type <= 23) { +            return ResultCode( // 0xE0E01BF4 +                    ErrorDescription::NotImplemented, ErrorModule::OS, +                    ErrorSummary::InvalidArgument, ErrorLevel::Usage); +        } else { +            return ResultCode( // 0xD8E007ED +                    ErrorDescription::InvalidEnumValue, ErrorModule::Kernel, +                    ErrorSummary::InvalidArgument, ErrorLevel::Permanent); +        } +        break; +    } + +    return RESULT_SUCCESS; +} +  namespace {      struct FunctionDef {          using Func = void(); @@ -746,7 +874,7 @@ static const FunctionDef SVC_Table[] = {      {0x28, HLE::Wrap<GetSystemTick>,        "GetSystemTick"},      {0x29, nullptr,                         "GetHandleInfo"},      {0x2A, nullptr,                         "GetSystemInfo"}, -    {0x2B, nullptr,                         "GetProcessInfo"}, +    {0x2B, HLE::Wrap<GetProcessInfo>,       "GetProcessInfo"},      {0x2C, nullptr,                         "GetThreadInfo"},      {0x2D, HLE::Wrap<ConnectToPort>,        "ConnectToPort"},      {0x2E, nullptr,                         "SendSyncRequest1"}, diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 3ccbc03b2..68ae38289 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include <cstring> +#include <numeric>  #include <type_traits>  #include "common/color.h" @@ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) {              u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());              u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); +            if (config.is_texture_copy) { +                u32 input_width = config.texture_copy.input_width * 16; +                u32 input_gap = config.texture_copy.input_gap * 16; +                u32 output_width = config.texture_copy.output_width * 16; +                u32 output_gap = config.texture_copy.output_gap * 16; + +                size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); +                VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size); + +                u32 remaining_size = config.texture_copy.size; +                u32 remaining_input = input_width; +                u32 remaining_output = output_width; +                while (remaining_size > 0) { +                    u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); + +                    std::memcpy(dst_pointer, src_pointer, copy_size); +                    src_pointer += copy_size; +                    dst_pointer += copy_size; + +                    remaining_input -= copy_size; +                    remaining_output -= copy_size; +                    remaining_size -= copy_size; + +                    if (remaining_input == 0) { +                        remaining_input = input_width; +                        src_pointer += input_gap; +                    } +                    if (remaining_output == 0) { +                        remaining_output = output_width; +                        dst_pointer += output_gap; +                    } +                } + +                LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", +                    config.texture_copy.size, +                    config.GetPhysicalInputAddress(), input_width, input_gap, +                    config.GetPhysicalOutputAddress(), output_width, output_gap, +                    config.flags); + +                size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); +                VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size); + +                GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); +                break; +            } +              if (config.scaling > config.ScaleXY) {                  LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());                  UNIMPLEMENTED();                  break;              } -            if (config.output_tiled && -                    (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { +            if (config.input_linear && config.scaling != config.NoScale) {                  LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");                  UNIMPLEMENTED();                  break; @@ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) {              VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); -            if (config.raw_copy) { -                // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions -                // TODO(Subv): Verify if raw copies perform scaling -                memcpy(dst_pointer, src_pointer, output_size); - -                LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy", -                    output_size, -                    config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), -                    config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), -                    config.output_format.Value(), config.flags); - -                GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); - -                VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); -                break; -            } -              for (u32 y = 0; y < output_height; ++y) {                  for (u32 x = 0; x < output_width; ++x) {                      Math::Vec4<u8> src_color; @@ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) {                      u32 src_offset;                      u32 dst_offset; -                    if (config.output_tiled) { +                    if (config.input_linear) {                          if (!config.dont_swizzle) {                              // Interpret the input as linear and the output as tiled                              u32 coarse_y = y & ~7; diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index daad506fe..2e3a9f779 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -201,12 +201,14 @@ struct Regs {              u32 flags;              BitField< 0, 1, u32> flip_vertically;  // flips input data vertically -            BitField< 1, 1, u32> output_tiled;     // Converts from linear to tiled format -            BitField< 3, 1, u32> raw_copy;         // Copies the data without performing any processing +            BitField< 1, 1, u32> input_linear;     // Converts from linear to tiled format +            BitField< 2, 1, u32> crop_input_lines; +            BitField< 3, 1, u32> is_texture_copy;  // Copies the data without performing any processing and respecting texture copy fields              BitField< 5, 1, u32> dont_swizzle;              BitField< 8, 3, PixelFormat> input_format;              BitField<12, 3, PixelFormat> output_format; - +            /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. +            BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented              BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer          }; @@ -214,10 +216,30 @@ struct Regs {          // it seems that writing to this field triggers the display transfer          u32 trigger; + +        INSERT_PADDING_WORDS(0x1); + +        struct { +            u32 size; + +            union { +                u32 input_size; + +                BitField< 0, 16, u32> input_width; +                BitField<16, 16, u32> input_gap; +            }; + +            union { +                u32 output_size; + +                BitField< 0, 16, u32> output_width; +                BitField<16, 16, u32> output_gap; +            }; +        } texture_copy;      } display_transfer_config; -    ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); +    ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); -    INSERT_PADDING_WORDS(0x331); +    INSERT_PADDING_WORDS(0x32D);      struct {          // command list size (in bytes) diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp deleted file mode 100644 index cbe993fbe..000000000 --- a/src/core/mem_map.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <map> -#include <memory> -#include <utility> -#include <vector> - -#include "common/common_types.h" -#include "common/logging/log.h" - -#include "core/hle/config_mem.h" -#include "core/hle/kernel/vm_manager.h" -#include "core/hle/result.h" -#include "core/hle/shared_page.h" -#include "core/mem_map.h" -#include "core/memory.h" -#include "core/memory_setup.h" - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace Memory { - -namespace { - -struct MemoryArea { -    u32 base; -    u32 size; -    const char* name; -}; - -// We don't declare the IO regions in here since its handled by other means. -static MemoryArea memory_areas[] = { -    {HEAP_VADDR,          HEAP_SIZE,              "Heap"},          // Application heap (main memory) -    {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE,     "Shared Memory"}, // Shared memory -    {LINEAR_HEAP_VADDR,   LINEAR_HEAP_SIZE,       "Linear Heap"},   // Linear heap (main memory) -    {VRAM_VADDR,          VRAM_SIZE,              "VRAM"},          // Video memory (VRAM) -    {DSP_RAM_VADDR,       DSP_RAM_SIZE,           "DSP RAM"},       // DSP memory -    {TLS_AREA_VADDR,      TLS_AREA_SIZE,          "TLS Area"},      // TLS memory -}; - -/// Represents a block of memory mapped by ControlMemory/MapMemoryBlock -struct MemoryBlock { -    MemoryBlock() : handle(0), base_address(0), address(0), size(0), operation(0), permissions(0) { -    } -    u32 handle; -    u32 base_address; -    u32 address; -    u32 size; -    u32 operation; -    u32 permissions; - -    const u32 GetVirtualAddress() const{ -        return base_address + address; -    } -}; - -static std::map<u32, MemoryBlock> heap_map; -static std::map<u32, MemoryBlock> heap_linear_map; - -} - -u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) { -    MemoryBlock block; - -    block.base_address  = HEAP_VADDR; -    block.size          = size; -    block.operation     = operation; -    block.permissions   = permissions; - -    if (heap_map.size() > 0) { -        const MemoryBlock last_block = heap_map.rbegin()->second; -        block.address = last_block.address + last_block.size; -    } -    heap_map[block.GetVirtualAddress()] = block; - -    return block.GetVirtualAddress(); -} - -u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions) { -    MemoryBlock block; - -    block.base_address  = LINEAR_HEAP_VADDR; -    block.size          = size; -    block.operation     = operation; -    block.permissions   = permissions; - -    if (heap_linear_map.size() > 0) { -        const MemoryBlock last_block = heap_linear_map.rbegin()->second; -        block.address = last_block.address + last_block.size; -    } -    heap_linear_map[block.GetVirtualAddress()] = block; - -    return block.GetVirtualAddress(); -} - -PAddr VirtualToPhysicalAddress(const VAddr addr) { -    if (addr == 0) { -        return 0; -    } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) { -        return addr - VRAM_VADDR + VRAM_PADDR; -    } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) { -        return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR; -    } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) { -        return addr - DSP_RAM_VADDR + DSP_RAM_PADDR; -    } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) { -        return addr - IO_AREA_VADDR + IO_AREA_PADDR; -    } - -    LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08x", addr); -    // To help with debugging, set bit on address so that it's obviously invalid. -    return addr | 0x80000000; -} - -VAddr PhysicalToVirtualAddress(const PAddr addr) { -    if (addr == 0) { -        return 0; -    } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) { -        return addr - VRAM_PADDR + VRAM_VADDR; -    } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) { -        return addr - FCRAM_PADDR + LINEAR_HEAP_VADDR; -    } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) { -        return addr - DSP_RAM_PADDR + DSP_RAM_VADDR; -    } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) { -        return addr - IO_AREA_PADDR + IO_AREA_VADDR; -    } - -    LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08x", addr); -    // To help with debugging, set bit on address so that it's obviously invalid. -    return addr | 0x80000000; -} - -void Init() { -    InitMemoryMap(); -    LOG_DEBUG(HW_Memory, "initialized OK"); -} - -void InitLegacyAddressSpace(Kernel::VMManager& address_space) { -    using namespace Kernel; - -    for (MemoryArea& area : memory_areas) { -        auto block = std::make_shared<std::vector<u8>>(area.size); -        address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap(); -    } - -    auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR, -            (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom(); -    address_space.Reprotect(cfg_mem_vma, VMAPermission::Read); - -    auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, -            (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); -    address_space.Reprotect(shared_page_vma, VMAPermission::Read); -} - -void Shutdown() { -    heap_map.clear(); -    heap_linear_map.clear(); - -    LOG_DEBUG(HW_Memory, "shutdown OK"); -} - -} // namespace diff --git a/src/core/mem_map.h b/src/core/mem_map.h deleted file mode 100644 index 229ef82c5..000000000 --- a/src/core/mem_map.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Kernel { -class VMManager; -} - -namespace Memory { - -void Init(); -void InitLegacyAddressSpace(Kernel::VMManager& address_space); -void Shutdown(); - -/** - * Maps a block of memory on the heap - * @param size Size of block in bytes - * @param operation Memory map operation type - * @param permissions Memory allocation permissions - */ -u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions); - -/** - * Maps a block of memory on the GSP heap - * @param size Size of block in bytes - * @param operation Memory map operation type - * @param permissions Control memory permissions - */ -u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions); - -/** - * Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical - * address. This should be used by services to translate addresses for use by the hardware. - */ -PAddr VirtualToPhysicalAddress(VAddr addr); - -/** - * Undoes a mapping performed by VirtualToPhysicalAddress(). - */ -VAddr PhysicalToVirtualAddress(PAddr addr); - -} // namespace diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 1f66bb27d..cde390b8a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -9,7 +9,7 @@  #include "common/logging/log.h"  #include "common/swap.h" -#include "core/mem_map.h" +#include "core/hle/kernel/process.h"  #include "core/memory.h"  #include "core/memory_setup.h" @@ -198,4 +198,42 @@ void WriteBlock(const VAddr addr, const u8* data, const size_t size) {          Write8(addr + offset, data[offset]);  } +PAddr VirtualToPhysicalAddress(const VAddr addr) { +    if (addr == 0) { +        return 0; +    } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) { +        return addr - VRAM_VADDR + VRAM_PADDR; +    } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) { +        return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR; +    } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) { +        return addr - DSP_RAM_VADDR + DSP_RAM_PADDR; +    } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) { +        return addr - IO_AREA_VADDR + IO_AREA_PADDR; +    } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) { +        return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR; +    } + +    LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr); +    // To help with debugging, set bit on address so that it's obviously invalid. +    return addr | 0x80000000; +} + +VAddr PhysicalToVirtualAddress(const PAddr addr) { +    if (addr == 0) { +        return 0; +    } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) { +        return addr - VRAM_PADDR + VRAM_VADDR; +    } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) { +        return addr - FCRAM_PADDR + Kernel::g_current_process->GetLinearHeapBase(); +    } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) { +        return addr - DSP_RAM_PADDR + DSP_RAM_VADDR; +    } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) { +        return addr - IO_AREA_PADDR + IO_AREA_VADDR; +    } + +    LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr); +    // To help with debugging, set bit on address so that it's obviously invalid. +    return addr | 0x80000000; +} +  } // namespace diff --git a/src/core/memory.h b/src/core/memory.h index 418609de0..5af72b7a7 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -15,6 +15,8 @@ namespace Memory {   * be mapped.   */  const u32 PAGE_SIZE = 0x1000; +const u32 PAGE_MASK = PAGE_SIZE - 1; +const int PAGE_BITS = 12;  /// Physical memory regions as seen from the ARM11  enum : PAddr { @@ -103,8 +105,15 @@ enum : VAddr {      // hardcoded value.      /// Area where TLS (Thread-Local Storage) buffers are allocated.      TLS_AREA_VADDR     = 0x1FF82000, -    TLS_AREA_SIZE      = 0x00030000, // Each TLS buffer is 0x200 bytes, allows for 300 threads +    TLS_ENTRY_SIZE     = 0x200, +    TLS_AREA_SIZE      = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size      TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, + + +    /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. +    NEW_LINEAR_HEAP_VADDR     = 0x30000000, +    NEW_LINEAR_HEAP_SIZE      = 0x10000000, +    NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE,  };  u8 Read8(VAddr addr); @@ -122,6 +131,17 @@ void WriteBlock(VAddr addr, const u8* data, size_t size);  u8* GetPointer(VAddr virtual_address);  /** +* Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical +* address. This should be used by services to translate addresses for use by the hardware. +*/ +PAddr VirtualToPhysicalAddress(VAddr addr); + +/** +* Undoes a mapping performed by VirtualToPhysicalAddress(). +*/ +VAddr PhysicalToVirtualAddress(PAddr addr); + +/**   * Gets a pointer to the memory region beginning at the specified physical address.   *   * @note This is currently implemented using PhysicalToVirtualAddress(). diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h index 361bfc816..84ff30120 100644 --- a/src/core/memory_setup.h +++ b/src/core/memory_setup.h @@ -10,9 +10,6 @@  namespace Memory { -const u32 PAGE_MASK = PAGE_SIZE - 1; -const int PAGE_BITS = 12; -  void InitMemoryMap();  /** diff --git a/src/core/system.cpp b/src/core/system.cpp index 561ff82f0..3cd84bf5e 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -4,11 +4,11 @@  #include "core/core.h"  #include "core/core_timing.h" -#include "core/mem_map.h"  #include "core/system.h"  #include "core/hw/hw.h"  #include "core/hle/hle.h"  #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory.h"  #include "video_core/video_core.h" @@ -29,7 +29,6 @@ void Shutdown() {      HLE::Shutdown();      Kernel::Shutdown();      HW::Shutdown(); -    Memory::Shutdown();      CoreTiming::Shutdown();      Core::Shutdown();  } diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index e14de0768..ae5a30441 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -197,12 +197,19 @@ void RunInterpreter(UnitState<Debug>& state) {              case OpCode::Id::DP3:              case OpCode::Id::DP4: +            case OpCode::Id::DPH: +            case OpCode::Id::DPHI:              {                  Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);                  Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);                  Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + +                OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); +                if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) +                    src1[3] = float24::FromFloat32(1.0f); +                  float24 dot = float24::FromFloat32(0.f); -                int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; +                int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;                  for (int i = 0; i < num_components; ++i)                      dot = dot + src1[i] * src2[i]; @@ -221,13 +228,12 @@ void RunInterpreter(UnitState<Debug>& state) {              {                  Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);                  Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); +                float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());                  for (int i = 0; i < 4; ++i) {                      if (!swizzle.DestComponentEnabled(i))                          continue; -                    // TODO: Be stable against division by zero! -                    // TODO: I think this might be wrong... we should only use one component here -                    dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); +                    dest[i] = rcp_res;                  }                  Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);                  break; @@ -238,13 +244,12 @@ void RunInterpreter(UnitState<Debug>& state) {              {                  Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);                  Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); +                float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));                  for (int i = 0; i < 4; ++i) {                      if (!swizzle.DestComponentEnabled(i))                          continue; -                    // TODO: Be stable against division by zero! -                    // TODO: I think this might be wrong... we should only use one component here -                    dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); +                    dest[i] = rsq_res;                  }                  Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);                  break; @@ -278,6 +283,20 @@ void RunInterpreter(UnitState<Debug>& state) {                  break;              } +            case OpCode::Id::SGE: +            case OpCode::Id::SGEI: +                Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); +                Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); +                Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); +                for (int i = 0; i < 4; ++i) { +                    if (!swizzle.DestComponentEnabled(i)) +                        continue; + +                    dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); +                } +                Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); +                break; +              case OpCode::Id::SLT:              case OpCode::Id::SLTI:                  Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); @@ -334,6 +353,42 @@ void RunInterpreter(UnitState<Debug>& state) {                  Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);                  break; +            case OpCode::Id::EX2: +            { +                Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); +                Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + +                // EX2 only takes first component exp2 and writes it to all dest components +                float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); +                for (int i = 0; i < 4; ++i) { +                    if (!swizzle.DestComponentEnabled(i)) +                        continue; + +                    dest[i] = ex2_res; +                } + +                Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); +                break; +            } + +            case OpCode::Id::LG2: +            { +                Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); +                Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + +                // LG2 only takes the first component log2 and writes it to all dest components +                float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); +                for (int i = 0; i < 4; ++i) { +                    if (!swizzle.DestComponentEnabled(i)) +                        continue; + +                    dest[i] = lg2_res; +                } + +                Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); +                break; +            } +              default:                  LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",                            (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 836942c6b..cc66fc8d6 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -23,14 +23,14 @@ const JitFunction instr_table[64] = {      &JitCompiler::Compile_ADD,      // add      &JitCompiler::Compile_DP3,      // dp3      &JitCompiler::Compile_DP4,      // dp4 -    nullptr,                        // dph +    &JitCompiler::Compile_DPH,      // dph      nullptr,                        // unknown -    nullptr,                        // ex2 -    nullptr,                        // lg2 +    &JitCompiler::Compile_EX2,      // ex2 +    &JitCompiler::Compile_LG2,      // lg2      nullptr,                        // unknown      &JitCompiler::Compile_MUL,      // mul -    nullptr,                        // lge -    nullptr,                        // slt +    &JitCompiler::Compile_SGE,      // sge +    &JitCompiler::Compile_SLT,      // slt      &JitCompiler::Compile_FLR,      // flr      &JitCompiler::Compile_MAX,      // max      &JitCompiler::Compile_MIN,      // min @@ -44,10 +44,10 @@ const JitFunction instr_table[64] = {      nullptr,                        // unknown      nullptr,                        // unknown      nullptr,                        // unknown -    nullptr,                        // dphi +    &JitCompiler::Compile_DPH,      // dphi      nullptr,                        // unknown -    nullptr,                        // sgei -    &JitCompiler::Compile_SLTI,     // slti +    &JitCompiler::Compile_SGE,      // sgei +    &JitCompiler::Compile_SLT,      // slti      nullptr,                        // unknown      nullptr,                        // unknown      nullptr,                        // unknown @@ -280,6 +280,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {      CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));  } +void JitCompiler::Compile_PushCallerSavedXMM() { +#ifndef _WIN32 +    SUB(64, R(RSP), Imm8(2 * 16)); +    MOVUPS(MDisp(RSP, 16), ONE); +    MOVUPS(MDisp(RSP, 0), NEGBIT); +#endif +} + +void JitCompiler::Compile_PopCallerSavedXMM() { +#ifndef _WIN32 +    MOVUPS(NEGBIT, MDisp(RSP, 0)); +    MOVUPS(ONE, MDisp(RSP, 16)); +    ADD(64, R(RSP), Imm8(2 * 16)); +#endif +} +  void JitCompiler::Compile_ADD(Instruction instr) {      Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);      Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); @@ -331,6 +347,71 @@ void JitCompiler::Compile_DP4(Instruction instr) {      Compile_DestEnable(instr, SRC1);  } +void JitCompiler::Compile_DPH(Instruction instr) { +    if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { +        Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); +        Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); +    } else { +        Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); +        Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); +    } + +    if (Common::GetCPUCaps().sse4_1) { +        // Set 4th component to 1.0 +        BLENDPS(SRC1, R(ONE), 0x8); // 0b1000 +        DPPS(SRC1, R(SRC2), 0xff); +    } else { +        // Reverse to set the 4th component to 1.0 +        SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); +        MOVSS(SRC1, R(ONE)); +        SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); + +        MULPS(SRC1, R(SRC2)); + +        MOVAPS(SRC2, R(SRC1)); +        SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY +        ADDPS(SRC1, R(SRC2)); + +        MOVAPS(SRC2, R(SRC1)); +        SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX +        ADDPS(SRC1, R(SRC2)); +    } + +    Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_EX2(Instruction instr) { +    Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); +    MOVSS(XMM0, R(SRC1)); + +    // The following will actually break the stack alignment +    ABI_PushAllCallerSavedRegsAndAdjustStack(); +    Compile_PushCallerSavedXMM(); +    ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); +    Compile_PopCallerSavedXMM(); +    ABI_PopAllCallerSavedRegsAndAdjustStack(); + +    SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); +    MOVAPS(SRC1, R(XMM0)); +    Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_LG2(Instruction instr) { +    Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); +    MOVSS(XMM0, R(SRC1)); + +    // The following will actually break the stack alignment +    ABI_PushAllCallerSavedRegsAndAdjustStack(); +    Compile_PushCallerSavedXMM(); +    ABI_CallFunction(reinterpret_cast<const void*>(log2f)); +    Compile_PopCallerSavedXMM(); +    ABI_PopAllCallerSavedRegsAndAdjustStack(); + +    SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); +    MOVAPS(SRC1, R(XMM0)); +    Compile_DestEnable(instr, SRC1); +} +  void JitCompiler::Compile_MUL(Instruction instr) {      Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);      Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); @@ -338,6 +419,36 @@ void JitCompiler::Compile_MUL(Instruction instr) {      Compile_DestEnable(instr, SRC1);  } +void JitCompiler::Compile_SGE(Instruction instr) { +    if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { +        Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); +        Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); +    } else { +        Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); +        Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); +    } + +    CMPPS(SRC1, R(SRC2), CMP_NLT); +    ANDPS(SRC1, R(ONE)); + +    Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_SLT(Instruction instr) { +    if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { +        Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); +        Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); +    } else { +        Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); +        Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); +    } + +    CMPPS(SRC1, R(SRC2), CMP_LT); +    ANDPS(SRC1, R(ONE)); + +    Compile_DestEnable(instr, SRC1); +} +  void JitCompiler::Compile_FLR(Instruction instr) {      Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); @@ -415,22 +526,13 @@ void JitCompiler::Compile_MOV(Instruction instr) {      Compile_DestEnable(instr, SRC1);  } -void JitCompiler::Compile_SLTI(Instruction instr) { -    Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); -    Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2); - -    CMPSS(SRC1, R(SRC2), CMP_LT); -    ANDPS(SRC1, R(ONE)); - -    Compile_DestEnable(instr, SRC1); -} -  void JitCompiler::Compile_RCP(Instruction instr) {      Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); -    // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica +    // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica      // performs this operation more accurately. This should be checked on hardware. -    RCPPS(SRC1, R(SRC1)); +    RCPSS(SRC1, R(SRC1)); +    SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX      Compile_DestEnable(instr, SRC1);  } @@ -438,9 +540,10 @@ void JitCompiler::Compile_RCP(Instruction instr) {  void JitCompiler::Compile_RSQ(Instruction instr) {      Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); -    // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica +    // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica      // performs this operation more accurately. This should be checked on hardware. -    RSQRTPS(SRC1, R(SRC1)); +    RSQRTSS(SRC1, R(SRC1)); +    SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX      Compile_DestEnable(instr, SRC1);  } @@ -646,12 +749,12 @@ CompiledShader* JitCompiler::Compile() {      // Used to set a register to one      static const __m128 one = { 1.f, 1.f, 1.f, 1.f };      MOV(PTRBITS, R(RAX), ImmPtr(&one)); -    MOVAPS(ONE, MDisp(RAX, 0)); +    MOVAPS(ONE, MatR(RAX));      // Used to negate registers      static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };      MOV(PTRBITS, R(RAX), ImmPtr(&neg)); -    MOVAPS(NEGBIT, MDisp(RAX, 0)); +    MOVAPS(NEGBIT, MatR(RAX));      looping = false; diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index b88f2a0d2..fbe19fe93 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -37,7 +37,12 @@ public:      void Compile_ADD(Instruction instr);      void Compile_DP3(Instruction instr);      void Compile_DP4(Instruction instr); +    void Compile_DPH(Instruction instr); +    void Compile_EX2(Instruction instr); +    void Compile_LG2(Instruction instr);      void Compile_MUL(Instruction instr); +    void Compile_SGE(Instruction instr); +    void Compile_SLT(Instruction instr);      void Compile_FLR(Instruction instr);      void Compile_MAX(Instruction instr);      void Compile_MIN(Instruction instr); @@ -45,7 +50,6 @@ public:      void Compile_RSQ(Instruction instr);      void Compile_MOVA(Instruction instr);      void Compile_MOV(Instruction instr); -    void Compile_SLTI(Instruction instr);      void Compile_NOP(Instruction instr);      void Compile_END(Instruction instr);      void Compile_CALL(Instruction instr); @@ -67,6 +71,9 @@ private:      void Compile_EvaluateCondition(Instruction instr);      void Compile_UniformCondition(Instruction instr); +    void Compile_PushCallerSavedXMM(); +    void Compile_PopCallerSavedXMM(); +      /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.      unsigned* offset_ptr = nullptr;  | 
