diff options
Diffstat (limited to 'src/common/x64')
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 38 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.h | 13 | ||||
| -rw-r--r-- | src/common/x64/native_clock.cpp | 103 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 48 | ||||
| -rw-r--r-- | src/common/x64/xbyak_abi.h | 95 | 
5 files changed, 231 insertions, 66 deletions
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..fccd2eee5 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -62,6 +62,17 @@ static CPUCaps Detect() {      std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));      std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));      std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); +    if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) +        caps.manufacturer = Manufacturer::Intel; +    else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) +        caps.manufacturer = Manufacturer::AMD; +    else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) +        caps.manufacturer = Manufacturer::Hygon; +    else +        caps.manufacturer = Manufacturer::Unknown; + +    u32 family = {}; +    u32 model = {};      __cpuid(cpu_id, 0x80000000); @@ -73,6 +84,14 @@ static CPUCaps Detect() {      // Detect family and other miscellaneous features      if (max_std_fn >= 1) {          __cpuid(cpu_id, 0x00000001); +        family = (cpu_id[0] >> 8) & 0xf; +        model = (cpu_id[0] >> 4) & 0xf; +        if (family == 0xf) { +            family += (cpu_id[0] >> 20) & 0xff; +        } +        if (family >= 6) { +            model += ((cpu_id[0] >> 16) & 0xf) << 4; +        }          if ((cpu_id[3] >> 25) & 1)              caps.sse = true; @@ -110,6 +129,11 @@ static CPUCaps Detect() {                  caps.bmi1 = true;              if ((cpu_id[1] >> 8) & 1)                  caps.bmi2 = true; +            // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) +            if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && +                (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { +                caps.avx512 = caps.avx2; +            }          }      } @@ -130,6 +154,20 @@ static CPUCaps Detect() {              caps.fma4 = true;      } +    if (max_ex_fn >= 0x80000007) { +        __cpuid(cpu_id, 0x80000007); +        if (cpu_id[3] & (1 << 8)) { +            caps.invariant_tsc = true; +        } +    } + +    if (max_std_fn >= 0x16) { +        __cpuid(cpu_id, 0x16); +        caps.base_frequency = cpu_id[0]; +        caps.max_frequency = cpu_id[1]; +        caps.bus_frequency = cpu_id[2]; +    } +      return caps;  } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..e3b63302e 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -6,8 +6,16 @@  namespace Common { +enum class Manufacturer : u32 { +    Intel = 0, +    AMD = 1, +    Hygon = 2, +    Unknown = 3, +}; +  /// x86/x64 CPU capabilities that may be detected by this module  struct CPUCaps { +    Manufacturer manufacturer;      char cpu_string[0x21];      char brand_string[0x41];      bool sse; @@ -19,11 +27,16 @@ struct CPUCaps {      bool lzcnt;      bool avx;      bool avx2; +    bool avx512;      bool bmi1;      bool bmi2;      bool fma;      bool fma4;      bool aes; +    bool invariant_tsc; +    u32 base_frequency; +    u32 max_frequency; +    u32 bus_frequency;  };  /** diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp new file mode 100644 index 000000000..424b39b1f --- /dev/null +++ b/src/common/x64/native_clock.cpp @@ -0,0 +1,103 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <mutex> +#include <thread> + +#ifdef _MSC_VER +#include <intrin.h> +#else +#include <x86intrin.h> +#endif + +#include "common/uint128.h" +#include "common/x64/native_clock.h" + +namespace Common { + +u64 EstimateRDTSCFrequency() { +    const auto milli_10 = std::chrono::milliseconds{10}; +    // get current time +    _mm_mfence(); +    const u64 tscStart = __rdtsc(); +    const auto startTime = std::chrono::high_resolution_clock::now(); +    // wait roughly 3 seconds +    while (true) { +        auto milli = std::chrono::duration_cast<std::chrono::milliseconds>( +            std::chrono::high_resolution_clock::now() - startTime); +        if (milli.count() >= 3000) +            break; +        std::this_thread::sleep_for(milli_10); +    } +    const auto endTime = std::chrono::high_resolution_clock::now(); +    _mm_mfence(); +    const u64 tscEnd = __rdtsc(); +    // calculate difference +    const u64 timer_diff = +        std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count(); +    const u64 tsc_diff = tscEnd - tscStart; +    const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); +    return tsc_freq; +} + +namespace X64 { +NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, +                         u64 rtsc_frequency) +    : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{ +                                                                             rtsc_frequency} { +    _mm_mfence(); +    last_measure = __rdtsc(); +    accumulated_ticks = 0U; +} + +u64 NativeClock::GetRTSC() { +    std::scoped_lock scope{rtsc_serialize}; +    _mm_mfence(); +    const u64 current_measure = __rdtsc(); +    u64 diff = current_measure - last_measure; +    diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) +    if (current_measure > last_measure) { +        last_measure = current_measure; +    } +    accumulated_ticks += diff; +    /// The clock cannot be more precise than the guest timer, remove the lower bits +    return accumulated_ticks & inaccuracy_mask; +} + +void NativeClock::Pause(bool is_paused) { +    if (!is_paused) { +        _mm_mfence(); +        last_measure = __rdtsc(); +    } +} + +std::chrono::nanoseconds NativeClock::GetTimeNS() { +    const u64 rtsc_value = GetRTSC(); +    return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; +} + +std::chrono::microseconds NativeClock::GetTimeUS() { +    const u64 rtsc_value = GetRTSC(); +    return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; +} + +std::chrono::milliseconds NativeClock::GetTimeMS() { +    const u64 rtsc_value = GetRTSC(); +    return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; +} + +u64 NativeClock::GetClockCycles() { +    const u64 rtsc_value = GetRTSC(); +    return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); +} + +u64 NativeClock::GetCPUCycles() { +    const u64 rtsc_value = GetRTSC(); +    return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); +} + +} // namespace X64 + +} // namespace Common diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h new file mode 100644 index 000000000..891a3bbfd --- /dev/null +++ b/src/common/x64/native_clock.h @@ -0,0 +1,48 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> + +#include "common/spin_lock.h" +#include "common/wall_clock.h" + +namespace Common { + +namespace X64 { +class NativeClock : public WallClock { +public: +    NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency); + +    std::chrono::nanoseconds GetTimeNS() override; + +    std::chrono::microseconds GetTimeUS() override; + +    std::chrono::milliseconds GetTimeMS() override; + +    u64 GetClockCycles() override; + +    u64 GetCPUCycles() override; + +    void Pause(bool is_paused) override; + +private: +    u64 GetRTSC(); + +    /// value used to reduce the native clocks accuracy as some apss rely on +    /// undefined behavior where the level of accuracy in the clock shouldn't +    /// be higher. +    static constexpr u64 inaccuracy_mask = ~(0x400 - 1); + +    SpinLock rtsc_serialize{}; +    u64 last_measure{}; +    u64 accumulated_ticks{}; +    u64 rtsc_frequency; +}; +} // namespace X64 + +u64 EstimateRDTSCFrequency(); + +} // namespace Common diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -11,7 +11,7 @@  namespace Common::X64 { -inline int RegToIndex(const Xbyak::Reg& reg) { +inline std::size_t RegToIndex(const Xbyak::Reg& reg) {      using Kind = Xbyak::Reg::Kind;      ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,                 "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {      return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);  } -inline Xbyak::Reg64 IndexToReg64(int reg_index) { +inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {      ASSERT(reg_index < 16); -    return Xbyak::Reg64(reg_index); +    return Xbyak::Reg64(static_cast<int>(reg_index));  } -inline Xbyak::Xmm IndexToXmm(int reg_index) { +inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {      ASSERT(reg_index >= 16 && reg_index < 32); -    return Xbyak::Xmm(reg_index - 16); +    return Xbyak::Xmm(static_cast<int>(reg_index - 16));  } -inline Xbyak::Reg IndexToReg(int reg_index) { +inline Xbyak::Reg IndexToReg(std::size_t reg_index) {      if (reg_index < 16) {          return IndexToReg64(reg_index);      } else { @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;  #endif -inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, -                                   size_t needed_frame_size, s32* out_subtraction, -                                   s32* out_xmm_offset) { +struct ABIFrameInfo { +    s32 subtraction; +    s32 xmm_offset; +}; + +inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, +                                           size_t needed_frame_size) {      const auto count = (regs & ABI_ALL_GPRS).count();      rsp_alignment -= count * 8;      size_t subtraction = 0; @@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,      rsp_alignment -= subtraction;      subtraction += rsp_alignment & 0xF; -    *out_subtraction = (s32)subtraction; -    *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); +    return ABIFrameInfo{static_cast<s32>(subtraction), +                        static_cast<s32>(subtraction - xmm_base_subtraction)};  }  inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,                                                size_t rsp_alignment, size_t needed_frame_size = 0) { -    s32 subtraction, xmm_offset; -    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); +    auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); +      for (std::size_t i = 0; i < regs.size(); ++i) {          if (regs[i] && ABI_ALL_GPRS[i]) { -            code.push(IndexToReg64(static_cast<int>(i))); +            code.push(IndexToReg64(i));          }      } -    if (subtraction != 0) { -        code.sub(code.rsp, subtraction); -    } -    for (int i = 0; i < regs.count(); i++) { -        if (regs.test(i) & ABI_ALL_GPRS.test(i)) { -            code.push(IndexToReg64(i)); -        } +    if (frame_info.subtraction != 0) { +        code.sub(code.rsp, frame_info.subtraction);      }      for (std::size_t i = 0; i < regs.size(); ++i) {          if (regs[i] && ABI_ALL_XMMS[i]) { -            code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); -            xmm_offset += 0x10; +            code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); +            frame_info.xmm_offset += 0x10;          }      } @@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b  inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,                                             size_t rsp_alignment, size_t needed_frame_size = 0) { -    s32 subtraction, xmm_offset; -    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); +    auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);      for (std::size_t i = 0; i < regs.size(); ++i) {          if (regs[i] && ABI_ALL_XMMS[i]) { -            code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); -            xmm_offset += 0x10; +            code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); +            frame_info.xmm_offset += 0x10;          }      } -    if (subtraction != 0) { -        code.add(code.rsp, subtraction); +    if (frame_info.subtraction != 0) { +        code.add(code.rsp, frame_info.subtraction);      }      // GPRs need to be popped in reverse order -    for (int i = 15; i >= 0; i--) { -        if (regs[i]) { -            code.pop(IndexToReg64(i)); -        } -    } -} - -inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, -                                                 size_t rsp_alignment, -                                                 size_t needed_frame_size = 0) { -    s32 subtraction, xmm_offset; -    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - -    for (std::size_t i = 0; i < regs.size(); ++i) { +    for (std::size_t j = 0; j < regs.size(); ++j) { +        const std::size_t i = regs.size() - j - 1;          if (regs[i] && ABI_ALL_GPRS[i]) { -            code.push(IndexToReg64(static_cast<int>(i))); -        } -    } - -    if (subtraction != 0) { -        code.sub(code.rsp, subtraction); -    } - -    return ABI_SHADOW_SPACE; -} - -inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, -                                              size_t rsp_alignment, size_t needed_frame_size = 0) { -    s32 subtraction, xmm_offset; -    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - -    if (subtraction != 0) { -        code.add(code.rsp, subtraction); -    } - -    // GPRs need to be popped in reverse order -    for (int i = 15; i >= 0; i--) { -        if (regs[i]) {              code.pop(IndexToReg64(i));          }      }  | 
