summaryrefslogtreecommitdiff
path: root/src/common/x64
diff options
context:
space:
mode:
authorVolcaEM <63682805+VolcaEM@users.noreply.github.com>2020-06-25 23:34:37 +0200
committerGitHub <noreply@github.com>2020-06-25 23:34:37 +0200
commit0f4512291a0504b32fac248f73a68fec34f657fe (patch)
tree3c69736a2ac82a9a0076ec3b79673c814e5f1abd /src/common/x64
parenta46df409397855812812e83ae3ed6ce6261b72cb (diff)
parenta980b4cbc16cf1efad077053e1bf2bbb53c3d60a (diff)
Merge branch 'master' into quickstart-faq
Diffstat (limited to 'src/common/x64')
-rw-r--r--src/common/x64/cpu_detect.cpp5
-rw-r--r--src/common/x64/cpu_detect.h1
-rw-r--r--src/common/x64/xbyak_abi.h95
3 files changed, 35 insertions, 66 deletions
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..f35dcb498 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -110,6 +110,11 @@ static CPUCaps Detect() {
caps.bmi1 = true;
if ((cpu_id[1] >> 8) & 1)
caps.bmi2 = true;
+ // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
+ if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
+ (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
+ caps.avx512 = caps.avx2;
+ }
}
}
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..7606c3f7b 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -19,6 +19,7 @@ struct CPUCaps {
bool lzcnt;
bool avx;
bool avx2;
+ bool avx512;
bool bmi1;
bool bmi2;
bool fma;
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 794da8a52..a5f5d4fc1 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,7 +11,7 @@
namespace Common::X64 {
-inline int RegToIndex(const Xbyak::Reg& reg) {
+inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
using Kind = Xbyak::Reg::Kind;
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
"RegSet only support GPRs and XMM registers.");
@@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
}
-inline Xbyak::Reg64 IndexToReg64(int reg_index) {
+inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
ASSERT(reg_index < 16);
- return Xbyak::Reg64(reg_index);
+ return Xbyak::Reg64(static_cast<int>(reg_index));
}
-inline Xbyak::Xmm IndexToXmm(int reg_index) {
+inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
ASSERT(reg_index >= 16 && reg_index < 32);
- return Xbyak::Xmm(reg_index - 16);
+ return Xbyak::Xmm(static_cast<int>(reg_index - 16));
}
-inline Xbyak::Reg IndexToReg(int reg_index) {
+inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
if (reg_index < 16) {
return IndexToReg64(reg_index);
} else {
@@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
#endif
-inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
- size_t needed_frame_size, s32* out_subtraction,
- s32* out_xmm_offset) {
+struct ABIFrameInfo {
+ s32 subtraction;
+ s32 xmm_offset;
+};
+
+inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
+ size_t needed_frame_size) {
const auto count = (regs & ABI_ALL_GPRS).count();
rsp_alignment -= count * 8;
size_t subtraction = 0;
@@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
rsp_alignment -= subtraction;
subtraction += rsp_alignment & 0xF;
- *out_subtraction = (s32)subtraction;
- *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
+ return ABIFrameInfo{static_cast<s32>(subtraction),
+ static_cast<s32>(subtraction - xmm_base_subtraction)};
}
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
size_t rsp_alignment, size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
+
for (std::size_t i = 0; i < regs.size(); ++i) {
if (regs[i] && ABI_ALL_GPRS[i]) {
- code.push(IndexToReg64(static_cast<int>(i)));
+ code.push(IndexToReg64(i));
}
}
- if (subtraction != 0) {
- code.sub(code.rsp, subtraction);
- }
- for (int i = 0; i < regs.count(); i++) {
- if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
- code.push(IndexToReg64(i));
- }
+ if (frame_info.subtraction != 0) {
+ code.sub(code.rsp, frame_info.subtraction);
}
for (std::size_t i = 0; i < regs.size(); ++i) {
if (regs[i] && ABI_ALL_XMMS[i]) {
- code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
- xmm_offset += 0x10;
+ code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
+ frame_info.xmm_offset += 0x10;
}
}
@@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
size_t rsp_alignment, size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
for (std::size_t i = 0; i < regs.size(); ++i) {
if (regs[i] && ABI_ALL_XMMS[i]) {
- code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
- xmm_offset += 0x10;
+ code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
+ frame_info.xmm_offset += 0x10;
}
}
- if (subtraction != 0) {
- code.add(code.rsp, subtraction);
+ if (frame_info.subtraction != 0) {
+ code.add(code.rsp, frame_info.subtraction);
}
// GPRs need to be popped in reverse order
- for (int i = 15; i >= 0; i--) {
- if (regs[i]) {
- code.pop(IndexToReg64(i));
- }
- }
-}
-
-inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
- size_t rsp_alignment,
- size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
-
- for (std::size_t i = 0; i < regs.size(); ++i) {
+ for (std::size_t j = 0; j < regs.size(); ++j) {
+ const std::size_t i = regs.size() - j - 1;
if (regs[i] && ABI_ALL_GPRS[i]) {
- code.push(IndexToReg64(static_cast<int>(i)));
- }
- }
-
- if (subtraction != 0) {
- code.sub(code.rsp, subtraction);
- }
-
- return ABI_SHADOW_SPACE;
-}
-
-inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
- size_t rsp_alignment, size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
-
- if (subtraction != 0) {
- code.add(code.rsp, subtraction);
- }
-
- // GPRs need to be popped in reverse order
- for (int i = 15; i >= 0; i--) {
- if (regs[i]) {
code.pop(IndexToReg64(i));
}
}