diff options
Diffstat (limited to 'src/common/x64')
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 138 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.h | 85 | ||||
| -rw-r--r-- | src/common/x64/native_clock.cpp | 69 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 13 | ||||
| -rw-r--r-- | src/common/x64/xbyak_abi.h | 5 | ||||
| -rw-r--r-- | src/common/x64/xbyak_util.h | 5 | 
6 files changed, 191 insertions, 124 deletions
| diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index fbeacc7e2..1a27532d4 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -1,8 +1,12 @@ -// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#include <array>  #include <cstring> +#include <iterator> +#include <string_view> +#include "common/bit_util.h"  #include "common/common_types.h"  #include "common/x64/cpu_detect.h" @@ -17,7 +21,7 @@  // clang-format on  #endif -static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { +static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {  #if defined(__DragonFly__) || defined(__FreeBSD__)      // Despite the name, this is just do_cpuid() with ECX as second input.      cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); @@ -30,7 +34,7 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {  #endif  } -static inline void __cpuid(int info[4], int function_id) { +static inline void __cpuid(int info[4], u32 function_id) {      return __cpuidex(info, function_id, 0);  } @@ -45,6 +49,17 @@ static inline u64 _xgetbv(u32 index) {  namespace Common { +CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) { +    if (brand_string == "GenuineIntel") { +        return Manufacturer::Intel; +    } else if (brand_string == "AuthenticAMD") { +        return Manufacturer::AMD; +    } else if (brand_string == "HygonGenuine") { +        return Manufacturer::Hygon; +    } +    return Manufacturer::Unknown; +} +  // Detects the various CPU features  static CPUCaps Detect() {      CPUCaps caps = {}; @@ -53,75 +68,74 @@ static CPUCaps Detect() {      // yuzu at all anyway      int cpu_id[4]; -    memset(caps.brand_string, 0, sizeof(caps.brand_string)); -    // Detect CPU's CPUID capabilities and grab CPU string +    // Detect CPU's CPUID capabilities and grab manufacturer string      __cpuid(cpu_id, 0x00000000); -    u32 max_std_fn = cpu_id[0]; // EAX - -    std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); -    std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); -    std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); -    if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) -        caps.manufacturer = Manufacturer::Intel; -    else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) -        caps.manufacturer = Manufacturer::AMD; -    else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) -        caps.manufacturer = Manufacturer::Hygon; -    else -        caps.manufacturer = Manufacturer::Unknown; +    const u32 max_std_fn = cpu_id[0]; // EAX -    __cpuid(cpu_id, 0x80000000); +    std::memset(caps.brand_string, 0, std::size(caps.brand_string)); +    std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32)); +    std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32)); +    std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32)); + +    caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string); + +    // Set reasonable default cpu string even if brand string not available +    std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string)); -    u32 max_ex_fn = cpu_id[0]; +    __cpuid(cpu_id, 0x80000000); -    // Set reasonable default brand string even if brand string not available -    strcpy(caps.cpu_string, caps.brand_string); +    const u32 max_ex_fn = cpu_id[0];      // Detect family and other miscellaneous features      if (max_std_fn >= 1) {          __cpuid(cpu_id, 0x00000001); -        if ((cpu_id[3] >> 25) & 1) -            caps.sse = true; -        if ((cpu_id[3] >> 26) & 1) -            caps.sse2 = true; -        if ((cpu_id[2]) & 1) -            caps.sse3 = true; -        if ((cpu_id[2] >> 9) & 1) -            caps.ssse3 = true; -        if ((cpu_id[2] >> 19) & 1) -            caps.sse4_1 = true; -        if ((cpu_id[2] >> 20) & 1) -            caps.sse4_2 = true; -        if ((cpu_id[2] >> 25) & 1) -            caps.aes = true; +        caps.sse = Common::Bit<25>(cpu_id[3]); +        caps.sse2 = Common::Bit<26>(cpu_id[3]); +        caps.sse3 = Common::Bit<0>(cpu_id[2]); +        caps.pclmulqdq = Common::Bit<1>(cpu_id[2]); +        caps.ssse3 = Common::Bit<9>(cpu_id[2]); +        caps.sse4_1 = Common::Bit<19>(cpu_id[2]); +        caps.sse4_2 = Common::Bit<20>(cpu_id[2]); +        caps.movbe = Common::Bit<22>(cpu_id[2]); +        caps.popcnt = Common::Bit<23>(cpu_id[2]); +        caps.aes = Common::Bit<25>(cpu_id[2]); +        caps.f16c = Common::Bit<29>(cpu_id[2]);          // AVX support requires 3 separate checks:          //  - Is the AVX bit set in CPUID?          //  - Is the XSAVE bit set in CPUID?          //  - XGETBV result has the XCR bit set. -        if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) { +        if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) {              if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {                  caps.avx = true; -                if ((cpu_id[2] >> 12) & 1) +                if (Common::Bit<12>(cpu_id[2]))                      caps.fma = true;              }          }          if (max_std_fn >= 7) {              __cpuidex(cpu_id, 0x00000007, 0x00000000); -            // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed -            if ((cpu_id[1] >> 5) & 1) -                caps.avx2 = caps.avx; -            if ((cpu_id[1] >> 3) & 1) -                caps.bmi1 = true; -            if ((cpu_id[1] >> 8) & 1) -                caps.bmi2 = true; -            // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) -            if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && -                (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { -                caps.avx512 = caps.avx2; +            // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed +            if (caps.avx) { +                caps.avx2 = Common::Bit<5>(cpu_id[1]); +                caps.avx512f = Common::Bit<16>(cpu_id[1]); +                caps.avx512dq = Common::Bit<17>(cpu_id[1]); +                caps.avx512cd = Common::Bit<28>(cpu_id[1]); +                caps.avx512bw = Common::Bit<30>(cpu_id[1]); +                caps.avx512vl = Common::Bit<31>(cpu_id[1]); +                caps.avx512vbmi = Common::Bit<1>(cpu_id[2]); +                caps.avx512bitalg = Common::Bit<12>(cpu_id[2]);              } + +            caps.bmi1 = Common::Bit<3>(cpu_id[1]); +            caps.bmi2 = Common::Bit<8>(cpu_id[1]); +            caps.sha = Common::Bit<29>(cpu_id[1]); + +            caps.gfni = Common::Bit<8>(cpu_id[2]); + +            __cpuidex(cpu_id, 0x00000007, 0x00000001); +            caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]);          }      } @@ -138,14 +152,28 @@ static CPUCaps Detect() {      if (max_ex_fn >= 0x80000001) {          // Check for more features          __cpuid(cpu_id, 0x80000001); -        if ((cpu_id[2] >> 16) & 1) -            caps.fma4 = true; +        caps.lzcnt = Common::Bit<5>(cpu_id[2]); +        caps.fma4 = Common::Bit<16>(cpu_id[2]);      }      if (max_ex_fn >= 0x80000007) {          __cpuid(cpu_id, 0x80000007); -        if (cpu_id[3] & (1 << 8)) { -            caps.invariant_tsc = true; +        caps.invariant_tsc = Common::Bit<8>(cpu_id[3]); +    } + +    if (max_std_fn >= 0x15) { +        __cpuid(cpu_id, 0x15); +        caps.tsc_crystal_ratio_denominator = cpu_id[0]; +        caps.tsc_crystal_ratio_numerator = cpu_id[1]; +        caps.crystal_frequency = cpu_id[2]; +        // Some CPU models might not return a crystal frequency. +        // The CPU model can be detected to use the values from turbostat +        // https://github.com/torvalds/linux/blob/master/tools/power/x86/turbostat/turbostat.c#L5569 +        // but it's easier to just estimate the TSC tick rate for these cases. +        if (caps.tsc_crystal_ratio_denominator) { +            caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * +                                 caps.tsc_crystal_ratio_numerator / +                                 caps.tsc_crystal_ratio_denominator;          }      } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index e3b63302e..6830f3795 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -1,42 +1,71 @@ -// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later  #pragma once -namespace Common { +#include <string_view> +#include "common/common_types.h" -enum class Manufacturer : u32 { -    Intel = 0, -    AMD = 1, -    Hygon = 2, -    Unknown = 3, -}; +namespace Common {  /// x86/x64 CPU capabilities that may be detected by this module  struct CPUCaps { + +    enum class Manufacturer : u8 { +        Unknown = 0, +        Intel = 1, +        AMD = 2, +        Hygon = 3, +    }; + +    static Manufacturer ParseManufacturer(std::string_view brand_string); +      Manufacturer manufacturer; -    char cpu_string[0x21]; -    char brand_string[0x41]; -    bool sse; -    bool sse2; -    bool sse3; -    bool ssse3; -    bool sse4_1; -    bool sse4_2; -    bool lzcnt; -    bool avx; -    bool avx2; -    bool avx512; -    bool bmi1; -    bool bmi2; -    bool fma; -    bool fma4; -    bool aes; -    bool invariant_tsc; +    char brand_string[13]; + +    char cpu_string[48]; +      u32 base_frequency;      u32 max_frequency;      u32 bus_frequency; + +    u32 tsc_crystal_ratio_denominator; +    u32 tsc_crystal_ratio_numerator; +    u32 crystal_frequency; +    u64 tsc_frequency; // Derived from the above three values + +    bool sse : 1; +    bool sse2 : 1; +    bool sse3 : 1; +    bool ssse3 : 1; +    bool sse4_1 : 1; +    bool sse4_2 : 1; + +    bool avx : 1; +    bool avx_vnni : 1; +    bool avx2 : 1; +    bool avx512f : 1; +    bool avx512dq : 1; +    bool avx512cd : 1; +    bool avx512bw : 1; +    bool avx512vl : 1; +    bool avx512vbmi : 1; +    bool avx512bitalg : 1; + +    bool aes : 1; +    bool bmi1 : 1; +    bool bmi2 : 1; +    bool f16c : 1; +    bool fma : 1; +    bool fma4 : 1; +    bool gfni : 1; +    bool invariant_tsc : 1; +    bool lzcnt : 1; +    bool movbe : 1; +    bool pclmulqdq : 1; +    bool popcnt : 1; +    bool sha : 1;  };  /** diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 91b842829..8b08332ab 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -1,36 +1,57 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later  #include <array>  #include <chrono> -#include <limits> -#include <mutex>  #include <thread>  #include "common/atomic_ops.h"  #include "common/uint128.h"  #include "common/x64/native_clock.h" +#ifdef _MSC_VER +#include <intrin.h> +#endif +  namespace Common { +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { +    _mm_lfence(); +    _ReadWriteBarrier(); +    const u64 result = __rdtsc(); +    _mm_lfence(); +    _ReadWriteBarrier(); +    return result; +} +#else +static u64 FencedRDTSC() { +    u64 result; +    asm volatile("lfence\n\t" +                 "rdtsc\n\t" +                 "shl $32, %%rdx\n\t" +                 "or %%rdx, %0\n\t" +                 "lfence" +                 : "=a"(result) +                 : +                 : "rdx", "memory", "cc"); +    return result; +} +#endif +  u64 EstimateRDTSCFrequency() {      // Discard the first result measuring the rdtsc. -    _mm_mfence(); -    __rdtsc(); +    FencedRDTSC();      std::this_thread::sleep_for(std::chrono::milliseconds{1}); -    _mm_mfence(); -    __rdtsc(); +    FencedRDTSC();      // Get the current time.      const auto start_time = std::chrono::steady_clock::now(); -    _mm_mfence(); -    const u64 tsc_start = __rdtsc(); +    const u64 tsc_start = FencedRDTSC();      // Wait for 200 milliseconds.      std::this_thread::sleep_for(std::chrono::milliseconds{200});      const auto end_time = std::chrono::steady_clock::now(); -    _mm_mfence(); -    const u64 tsc_end = __rdtsc(); +    const u64 tsc_end = FencedRDTSC();      // Calculate differences.      const u64 timer_diff = static_cast<u64>(          std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); @@ -44,8 +65,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen                           u64 rtsc_frequency_)      : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{                                                                                 rtsc_frequency_} { -    _mm_mfence(); -    time_point.inner.last_measure = __rdtsc(); +    time_point.inner.last_measure = FencedRDTSC();      time_point.inner.accumulated_ticks = 0U;      ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);      us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); @@ -57,10 +77,10 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen  u64 NativeClock::GetRTSC() {      TimePoint new_time_point{};      TimePoint current_time_point{}; + +    current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());      do { -        current_time_point.pack = time_point.pack; -        _mm_mfence(); -        const u64 current_measure = __rdtsc(); +        const u64 current_measure = FencedRDTSC();          u64 diff = current_measure - current_time_point.inner.last_measure;          diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)          new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure @@ -68,22 +88,21 @@ u64 NativeClock::GetRTSC() {                                                  : current_time_point.inner.last_measure;          new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;      } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, -                                           current_time_point.pack)); -    /// The clock cannot be more precise than the guest timer, remove the lower bits -    return new_time_point.inner.accumulated_ticks & inaccuracy_mask; +                                           current_time_point.pack, current_time_point.pack)); +    return new_time_point.inner.accumulated_ticks;  }  void NativeClock::Pause(bool is_paused) {      if (!is_paused) {          TimePoint current_time_point{};          TimePoint new_time_point{}; + +        current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());          do { -            current_time_point.pack = time_point.pack;              new_time_point.pack = current_time_point.pack; -            _mm_mfence(); -            new_time_point.inner.last_measure = __rdtsc(); +            new_time_point.inner.last_measure = FencedRDTSC();          } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, -                                               current_time_point.pack)); +                                               current_time_point.pack, current_time_point.pack));      }  } diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 7cbd400d2..38ae7a462 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -1,11 +1,8 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later  #pragma once -#include <optional> -  #include "common/wall_clock.h"  namespace Common { @@ -40,12 +37,8 @@ private:          } inner;      }; -    /// value used to reduce the native clocks accuracy as some apss rely on -    /// undefined behavior where the level of accuracy in the clock shouldn't -    /// be higher. -    static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); -      TimePoint time_point; +      // factors      u64 clock_rtsc_factor{};      u64 cpu_rtsc_factor{}; diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 87b3d63a4..67e6e63c8 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -1,6 +1,5 @@ -// Copyright 2016 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2016 Citra Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later  #pragma once diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h index 44d2558f1..250e5cddb 100644 --- a/src/common/x64/xbyak_util.h +++ b/src/common/x64/xbyak_util.h @@ -1,6 +1,5 @@ -// Copyright 2016 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2016 Citra Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later  #pragma once | 
