diff options
Diffstat (limited to 'src/common/x64')
-rw-r--r-- | src/common/x64/cpu_detect.cpp | 38 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.h | 13 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 95 | ||||
-rw-r--r-- | src/common/x64/native_clock.h | 41 | ||||
-rw-r--r-- | src/common/x64/xbyak_abi.h | 95 |
5 files changed, 216 insertions, 66 deletions
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..fccd2eee5 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -62,6 +62,17 @@ static CPUCaps Detect() { std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); + if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) + caps.manufacturer = Manufacturer::Intel; + else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) + caps.manufacturer = Manufacturer::AMD; + else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) + caps.manufacturer = Manufacturer::Hygon; + else + caps.manufacturer = Manufacturer::Unknown; + + u32 family = {}; + u32 model = {}; __cpuid(cpu_id, 0x80000000); @@ -73,6 +84,14 @@ static CPUCaps Detect() { // Detect family and other miscellaneous features if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); + family = (cpu_id[0] >> 8) & 0xf; + model = (cpu_id[0] >> 4) & 0xf; + if (family == 0xf) { + family += (cpu_id[0] >> 20) & 0xff; + } + if (family >= 6) { + model += ((cpu_id[0] >> 16) & 0xf) << 4; + } if ((cpu_id[3] >> 25) & 1) caps.sse = true; @@ -110,6 +129,11 @@ static CPUCaps Detect() { caps.bmi1 = true; if ((cpu_id[1] >> 8) & 1) caps.bmi2 = true; + // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) + if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && + (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { + caps.avx512 = caps.avx2; + } } } @@ -130,6 +154,20 @@ static CPUCaps Detect() { caps.fma4 = true; } + if (max_ex_fn >= 0x80000007) { + __cpuid(cpu_id, 0x80000007); + if (cpu_id[3] & (1 << 8)) { + caps.invariant_tsc = true; + } + } + + if (max_std_fn >= 0x16) { + __cpuid(cpu_id, 0x16); + caps.base_frequency = cpu_id[0]; + caps.max_frequency = cpu_id[1]; + caps.bus_frequency = cpu_id[2]; + } + return caps; } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..e3b63302e 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -6,8 +6,16 @@ namespace Common { +enum class Manufacturer : u32 { + Intel = 0, + AMD = 1, + Hygon = 2, + Unknown = 3, +}; + /// x86/x64 CPU capabilities that may be detected by this module struct CPUCaps { + Manufacturer manufacturer; char cpu_string[0x21]; char brand_string[0x41]; bool sse; @@ -19,11 +27,16 @@ struct CPUCaps { bool lzcnt; bool avx; bool avx2; + bool avx512; bool bmi1; bool bmi2; bool fma; bool fma4; bool aes; + bool invariant_tsc; + u32 base_frequency; + u32 max_frequency; + u32 bus_frequency; }; /** diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp new file mode 100644 index 000000000..26d4d0ba6 --- /dev/null +++ b/src/common/x64/native_clock.cpp @@ -0,0 +1,95 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <thread> + +#ifdef _MSC_VER +#include <intrin.h> +#else +#include <x86intrin.h> +#endif + +#include "common/uint128.h" +#include "common/x64/native_clock.h" + +namespace Common { + +u64 EstimateRDTSCFrequency() { + const auto milli_10 = std::chrono::milliseconds{10}; + // get current time + _mm_mfence(); + const u64 tscStart = __rdtsc(); + const auto startTime = std::chrono::high_resolution_clock::now(); + // wait roughly 3 seconds + while (true) { + auto milli = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::high_resolution_clock::now() - startTime); + if (milli.count() >= 3000) + break; + std::this_thread::sleep_for(milli_10); + } + const auto endTime = std::chrono::high_resolution_clock::now(); + _mm_mfence(); + const u64 tscEnd = __rdtsc(); + // calculate difference + const u64 timer_diff = + std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count(); + const u64 tsc_diff = tscEnd - tscStart; + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); + return tsc_freq; +} + +namespace X64 { +NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, + u64 rtsc_frequency) + : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{ + rtsc_frequency} { + _mm_mfence(); + last_measure = __rdtsc(); + accumulated_ticks = 0U; +} + +u64 NativeClock::GetRTSC() { + rtsc_serialize.lock(); + _mm_mfence(); + const u64 current_measure = __rdtsc(); + u64 diff = current_measure - last_measure; + diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) + if (current_measure > last_measure) { + last_measure = current_measure; + } + accumulated_ticks += diff; + rtsc_serialize.unlock(); + return accumulated_ticks; +} + +std::chrono::nanoseconds NativeClock::GetTimeNS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; +} + +std::chrono::microseconds NativeClock::GetTimeUS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; +} + +std::chrono::milliseconds NativeClock::GetTimeMS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; +} + +u64 NativeClock::GetClockCycles() { + const u64 rtsc_value = GetRTSC(); + return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); +} + +u64 NativeClock::GetCPUCycles() { + const u64 rtsc_value = GetRTSC(); + return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); +} + +} // namespace X64 + +} // namespace Common diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h new file mode 100644 index 000000000..b58cf9f5a --- /dev/null +++ b/src/common/x64/native_clock.h @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> + +#include "common/spin_lock.h" +#include "common/wall_clock.h" + +namespace Common { + +namespace X64 { +class NativeClock : public WallClock { +public: + NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency); + + std::chrono::nanoseconds GetTimeNS() override; + + std::chrono::microseconds GetTimeUS() override; + + std::chrono::milliseconds GetTimeMS() override; + + u64 GetClockCycles() override; + + u64 GetCPUCycles() override; + +private: + u64 GetRTSC(); + + SpinLock rtsc_serialize{}; + u64 last_measure{}; + u64 accumulated_ticks{}; + u64 rtsc_frequency; +}; +} // namespace X64 + +u64 EstimateRDTSCFrequency(); + +} // namespace Common diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -11,7 +11,7 @@ namespace Common::X64 { -inline int RegToIndex(const Xbyak::Reg& reg) { +inline std::size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); } -inline Xbyak::Reg64 IndexToReg64(int reg_index) { +inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { ASSERT(reg_index < 16); - return Xbyak::Reg64(reg_index); + return Xbyak::Reg64(static_cast<int>(reg_index)); } -inline Xbyak::Xmm IndexToXmm(int reg_index) { +inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); - return Xbyak::Xmm(reg_index - 16); + return Xbyak::Xmm(static_cast<int>(reg_index - 16)); } -inline Xbyak::Reg IndexToReg(int reg_index) { +inline Xbyak::Reg IndexToReg(std::size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0; #endif -inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, - size_t needed_frame_size, s32* out_subtraction, - s32* out_xmm_offset) { +struct ABIFrameInfo { + s32 subtraction; + s32 xmm_offset; +}; + +inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, + size_t needed_frame_size) { const auto count = (regs & ABI_ALL_GPRS).count(); rsp_alignment -= count * 8; size_t subtraction = 0; @@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, rsp_alignment -= subtraction; subtraction += rsp_alignment & 0xF; - *out_subtraction = (s32)subtraction; - *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); + return ABIFrameInfo{static_cast<s32>(subtraction), + static_cast<s32>(subtraction - xmm_base_subtraction)}; } inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); + for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast<int>(i))); + code.push(IndexToReg64(i)); } } - if (subtraction != 0) { - code.sub(code.rsp, subtraction); - } - for (int i = 0; i < regs.count(); i++) { - if (regs.test(i) & ABI_ALL_GPRS.test(i)) { - code.push(IndexToReg64(i)); - } + if (frame_info.subtraction != 0) { + code.sub(code.rsp, frame_info.subtraction); } for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); - xmm_offset += 0x10; + code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); + frame_info.xmm_offset += 0x10; } } @@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); - xmm_offset += 0x10; + code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); + frame_info.xmm_offset += 0x10; } } - if (subtraction != 0) { - code.add(code.rsp, subtraction); + if (frame_info.subtraction != 0) { + code.add(code.rsp, frame_info.subtraction); } // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { - code.pop(IndexToReg64(i)); - } - } -} - -inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, - size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - for (std::size_t i = 0; i < regs.size(); ++i) { + for (std::size_t j = 0; j < regs.size(); ++j) { + const std::size_t i = regs.size() - j - 1; if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast<int>(i))); - } - } - - if (subtraction != 0) { - code.sub(code.rsp, subtraction); - } - - return ABI_SHADOW_SPACE; -} - -inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - if (subtraction != 0) { - code.add(code.rsp, subtraction); - } - - // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { code.pop(IndexToReg64(i)); } } |