summaryrefslogtreecommitdiff
path: root/src/common/x64
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/x64')
-rw-r--r--src/common/x64/cpu_detect.cpp38
-rw-r--r--src/common/x64/cpu_detect.h13
-rw-r--r--src/common/x64/native_clock.cpp103
-rw-r--r--src/common/x64/native_clock.h48
-rw-r--r--src/common/x64/xbyak_abi.h229
-rw-r--r--src/common/x64/xbyak_util.h47
6 files changed, 478 insertions, 0 deletions
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..fccd2eee5 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
+ if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
+ caps.manufacturer = Manufacturer::Intel;
+ else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
+ caps.manufacturer = Manufacturer::AMD;
+ else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
+ caps.manufacturer = Manufacturer::Hygon;
+ else
+ caps.manufacturer = Manufacturer::Unknown;
+
+ u32 family = {};
+ u32 model = {};
__cpuid(cpu_id, 0x80000000);
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
// Detect family and other miscellaneous features
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
+ family = (cpu_id[0] >> 8) & 0xf;
+ model = (cpu_id[0] >> 4) & 0xf;
+ if (family == 0xf) {
+ family += (cpu_id[0] >> 20) & 0xff;
+ }
+ if (family >= 6) {
+ model += ((cpu_id[0] >> 16) & 0xf) << 4;
+ }
if ((cpu_id[3] >> 25) & 1)
caps.sse = true;
@@ -110,6 +129,11 @@ static CPUCaps Detect() {
caps.bmi1 = true;
if ((cpu_id[1] >> 8) & 1)
caps.bmi2 = true;
+ // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
+ if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
+ (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
+ caps.avx512 = caps.avx2;
+ }
}
}
@@ -130,6 +154,20 @@ static CPUCaps Detect() {
caps.fma4 = true;
}
+ if (max_ex_fn >= 0x80000007) {
+ __cpuid(cpu_id, 0x80000007);
+ if (cpu_id[3] & (1 << 8)) {
+ caps.invariant_tsc = true;
+ }
+ }
+
+ if (max_std_fn >= 0x16) {
+ __cpuid(cpu_id, 0x16);
+ caps.base_frequency = cpu_id[0];
+ caps.max_frequency = cpu_id[1];
+ caps.bus_frequency = cpu_id[2];
+ }
+
return caps;
}
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..e3b63302e 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
namespace Common {
+enum class Manufacturer : u32 {
+ Intel = 0,
+ AMD = 1,
+ Hygon = 2,
+ Unknown = 3,
+};
+
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
+ Manufacturer manufacturer;
char cpu_string[0x21];
char brand_string[0x41];
bool sse;
@@ -19,11 +27,16 @@ struct CPUCaps {
bool lzcnt;
bool avx;
bool avx2;
+ bool avx512;
bool bmi1;
bool bmi2;
bool fma;
bool fma4;
bool aes;
+ bool invariant_tsc;
+ u32 base_frequency;
+ u32 max_frequency;
+ u32 bus_frequency;
};
/**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 000000000..424b39b1f
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,103 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <mutex>
+#include <thread>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+
+#include "common/uint128.h"
+#include "common/x64/native_clock.h"
+
+namespace Common {
+
+u64 EstimateRDTSCFrequency() {
+ const auto milli_10 = std::chrono::milliseconds{10};
+ // get current time
+ _mm_mfence();
+ const u64 tscStart = __rdtsc();
+ const auto startTime = std::chrono::high_resolution_clock::now();
+ // wait roughly 3 seconds
+ while (true) {
+ auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::high_resolution_clock::now() - startTime);
+ if (milli.count() >= 3000)
+ break;
+ std::this_thread::sleep_for(milli_10);
+ }
+ const auto endTime = std::chrono::high_resolution_clock::now();
+ _mm_mfence();
+ const u64 tscEnd = __rdtsc();
+ // calculate difference
+ const u64 timer_diff =
+ std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
+ const u64 tsc_diff = tscEnd - tscStart;
+ const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
+ return tsc_freq;
+}
+
+namespace X64 {
+NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
+ u64 rtsc_frequency)
+ : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
+ rtsc_frequency} {
+ _mm_mfence();
+ last_measure = __rdtsc();
+ accumulated_ticks = 0U;
+}
+
+u64 NativeClock::GetRTSC() {
+ std::scoped_lock scope{rtsc_serialize};
+ _mm_mfence();
+ const u64 current_measure = __rdtsc();
+ u64 diff = current_measure - last_measure;
+ diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
+ if (current_measure > last_measure) {
+ last_measure = current_measure;
+ }
+ accumulated_ticks += diff;
+ /// The clock cannot be more precise than the guest timer, remove the lower bits
+ return accumulated_ticks & inaccuracy_mask;
+}
+
+void NativeClock::Pause(bool is_paused) {
+ if (!is_paused) {
+ _mm_mfence();
+ last_measure = __rdtsc();
+ }
+}
+
+std::chrono::nanoseconds NativeClock::GetTimeNS() {
+ const u64 rtsc_value = GetRTSC();
+ return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
+}
+
+std::chrono::microseconds NativeClock::GetTimeUS() {
+ const u64 rtsc_value = GetRTSC();
+ return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
+}
+
+std::chrono::milliseconds NativeClock::GetTimeMS() {
+ const u64 rtsc_value = GetRTSC();
+ return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
+}
+
+u64 NativeClock::GetClockCycles() {
+ const u64 rtsc_value = GetRTSC();
+ return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
+}
+
+u64 NativeClock::GetCPUCycles() {
+ const u64 rtsc_value = GetRTSC();
+ return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
+}
+
+} // namespace X64
+
+} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 000000000..97aab6ac9
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,48 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/spin_lock.h"
+#include "common/wall_clock.h"
+
+namespace Common {
+
+namespace X64 {
+class NativeClock final : public WallClock {
+public:
+ NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
+
+ std::chrono::nanoseconds GetTimeNS() override;
+
+ std::chrono::microseconds GetTimeUS() override;
+
+ std::chrono::milliseconds GetTimeMS() override;
+
+ u64 GetClockCycles() override;
+
+ u64 GetCPUCycles() override;
+
+ void Pause(bool is_paused) override;
+
+private:
+ u64 GetRTSC();
+
+ /// value used to reduce the native clocks accuracy as some apss rely on
+ /// undefined behavior where the level of accuracy in the clock shouldn't
+ /// be higher.
+ static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
+
+ SpinLock rtsc_serialize{};
+ u64 last_measure{};
+ u64 accumulated_ticks{};
+ u64 rtsc_frequency;
+};
+} // namespace X64
+
+u64 EstimateRDTSCFrequency();
+
+} // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..26e4bfda5
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,229 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <initializer_list>
+#include <xbyak.h>
+#include "common/assert.h"
+
+namespace Common::X64 {
+
+constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) {
+ using Kind = Xbyak::Reg::Kind;
+ ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
+ "RegSet only support GPRs and XMM registers.");
+ ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
+ return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
+}
+
+constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
+ ASSERT(reg_index < 16);
+ return Xbyak::Reg64(static_cast<int>(reg_index));
+}
+
+constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
+ ASSERT(reg_index >= 16 && reg_index < 32);
+ return Xbyak::Xmm(static_cast<int>(reg_index - 16));
+}
+
+constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) {
+ if (reg_index < 16) {
+ return IndexToReg64(reg_index);
+ } else {
+ return IndexToXmm(reg_index);
+ }
+}
+
+inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
+ std::bitset<32> bits;
+ for (const Xbyak::Reg& reg : regs) {
+ bits[RegToIndex(reg)] = true;
+ }
+ return bits;
+}
+
+constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
+constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
+
+#ifdef _WIN32
+
+// Microsoft x64 ABI
+constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
+constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
+constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
+constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
+constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
+
+const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rcx,
+ Xbyak::util::rdx,
+ Xbyak::util::r8,
+ Xbyak::util::r9,
+ Xbyak::util::r10,
+ Xbyak::util::r11,
+ // XMMs
+ Xbyak::util::xmm0,
+ Xbyak::util::xmm1,
+ Xbyak::util::xmm2,
+ Xbyak::util::xmm3,
+ Xbyak::util::xmm4,
+ Xbyak::util::xmm5,
+});
+
+const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rbx,
+ Xbyak::util::rsi,
+ Xbyak::util::rdi,
+ Xbyak::util::rbp,
+ Xbyak::util::r12,
+ Xbyak::util::r13,
+ Xbyak::util::r14,
+ Xbyak::util::r15,
+ // XMMs
+ Xbyak::util::xmm6,
+ Xbyak::util::xmm7,
+ Xbyak::util::xmm8,
+ Xbyak::util::xmm9,
+ Xbyak::util::xmm10,
+ Xbyak::util::xmm11,
+ Xbyak::util::xmm12,
+ Xbyak::util::xmm13,
+ Xbyak::util::xmm14,
+ Xbyak::util::xmm15,
+});
+
+constexpr size_t ABI_SHADOW_SPACE = 0x20;
+
+#else
+
+// System V x86-64 ABI
+constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
+constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
+constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
+constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
+constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
+
+const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rcx,
+ Xbyak::util::rdx,
+ Xbyak::util::rdi,
+ Xbyak::util::rsi,
+ Xbyak::util::r8,
+ Xbyak::util::r9,
+ Xbyak::util::r10,
+ Xbyak::util::r11,
+ // XMMs
+ Xbyak::util::xmm0,
+ Xbyak::util::xmm1,
+ Xbyak::util::xmm2,
+ Xbyak::util::xmm3,
+ Xbyak::util::xmm4,
+ Xbyak::util::xmm5,
+ Xbyak::util::xmm6,
+ Xbyak::util::xmm7,
+ Xbyak::util::xmm8,
+ Xbyak::util::xmm9,
+ Xbyak::util::xmm10,
+ Xbyak::util::xmm11,
+ Xbyak::util::xmm12,
+ Xbyak::util::xmm13,
+ Xbyak::util::xmm14,
+ Xbyak::util::xmm15,
+});
+
+const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rbx,
+ Xbyak::util::rbp,
+ Xbyak::util::r12,
+ Xbyak::util::r13,
+ Xbyak::util::r14,
+ Xbyak::util::r15,
+});
+
+constexpr size_t ABI_SHADOW_SPACE = 0;
+
+#endif
+
+struct ABIFrameInfo {
+ s32 subtraction;
+ s32 xmm_offset;
+};
+
+inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
+ size_t needed_frame_size) {
+ const auto count = (regs & ABI_ALL_GPRS).count();
+ rsp_alignment -= count * 8;
+ size_t subtraction = 0;
+ const auto xmm_count = (regs & ABI_ALL_XMMS).count();
+ if (xmm_count) {
+ // If we have any XMMs to save, we must align the stack here.
+ subtraction = rsp_alignment & 0xF;
+ }
+ subtraction += 0x10 * xmm_count;
+ size_t xmm_base_subtraction = subtraction;
+ subtraction += needed_frame_size;
+ subtraction += ABI_SHADOW_SPACE;
+ // Final alignment.
+ rsp_alignment -= subtraction;
+ subtraction += rsp_alignment & 0xF;
+
+ return ABIFrameInfo{static_cast<s32>(subtraction),
+ static_cast<s32>(subtraction - xmm_base_subtraction)};
+}
+
+inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment, size_t needed_frame_size = 0) {
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_GPRS[i]) {
+ code.push(IndexToReg64(i));
+ }
+ }
+
+ if (frame_info.subtraction != 0) {
+ code.sub(code.rsp, frame_info.subtraction);
+ }
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_XMMS[i]) {
+ code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
+ frame_info.xmm_offset += 0x10;
+ }
+ }
+
+ return ABI_SHADOW_SPACE;
+}
+
+inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment, size_t needed_frame_size = 0) {
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_XMMS[i]) {
+ code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
+ frame_info.xmm_offset += 0x10;
+ }
+ }
+
+ if (frame_info.subtraction != 0) {
+ code.add(code.rsp, frame_info.subtraction);
+ }
+
+ // GPRs need to be popped in reverse order
+ for (std::size_t j = 0; j < regs.size(); ++j) {
+ const std::size_t i = regs.size() - j - 1;
+ if (regs[i] && ABI_ALL_GPRS[i]) {
+ code.pop(IndexToReg64(i));
+ }
+ }
+}
+
+} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..df17f8cbe
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <xbyak.h>
+#include "common/x64/xbyak_abi.h"
+
+namespace Common::X64 {
+
+// Constants for use with cmpps/cmpss
+enum {
+ CMP_EQ = 0,
+ CMP_LT = 1,
+ CMP_LE = 2,
+ CMP_UNORD = 3,
+ CMP_NEQ = 4,
+ CMP_NLT = 5,
+ CMP_NLE = 6,
+ CMP_ORD = 7,
+};
+
+constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
+ const u64 distance = target - (ref + 5);
+ return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
+}
+
+inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
+ return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
+}
+
+template <typename T>
+inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
+ static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
+ size_t addr = reinterpret_cast<size_t>(f);
+ if (IsWithin2G(code, addr)) {
+ code.call(f);
+ } else {
+ // ABI_RETURN is a safe temp register to use before a call
+ code.mov(ABI_RETURN, addr);
+ code.call(ABI_RETURN);
+ }
+}
+
+} // namespace Common::X64