diff options
Diffstat (limited to 'src/common/x64')
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 3 | ||||
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 20 | ||||
| -rw-r--r-- | src/common/x64/native_clock.cpp | 166 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 59 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.cpp | 39 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.h | 37 | 
6 files changed, 126 insertions, 198 deletions
| diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..c998b1197 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -14,6 +14,7 @@  #include "common/common_types.h"  #include "common/logging/log.h"  #include "common/x64/cpu_detect.h" +#include "common/x64/rdtsc.h"  #ifdef _WIN32  #include <windows.h> @@ -187,6 +188,8 @@ static CPUCaps Detect() {              caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) *                                   caps.tsc_crystal_ratio_numerator /                                   caps.tsc_crystal_ratio_denominator; +        } else { +            caps.tsc_frequency = X64::EstimateRDTSCFrequency();          }      } diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -9,19 +9,11 @@  #include "common/x64/cpu_detect.h"  #include "common/x64/cpu_wait.h" +#include "common/x64/rdtsc.h"  namespace Common::X64 {  #ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { -    _mm_lfence(); -    _ReadWriteBarrier(); -    const u64 result = __rdtsc(); -    _mm_lfence(); -    _ReadWriteBarrier(); -    return result; -} -  __forceinline static void TPAUSE() {      // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.      // For reference: @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() {      _tpause(0, FencedRDTSC() + PauseCycles);  }  #else -static u64 FencedRDTSC() { -    u64 eax; -    u64 edx; -    asm volatile("lfence\n\t" -                 "rdtsc\n\t" -                 "lfence\n\t" -                 : "=a"(eax), "=d"(edx)); -    return (edx << 32) | eax; -} -  static void TPAUSE() {      // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.      // For reference: diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -1,164 +1,50 @@  // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project  // SPDX-License-Identifier: GPL-2.0-or-later -#include <array> -#include <chrono> -#include <thread> - -#include "common/atomic_ops.h" -#include "common/steady_clock.h"  #include "common/uint128.h"  #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" -#ifdef _MSC_VER -#include <intrin.h> -#endif - -namespace Common { +namespace Common::X64 { -#ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { -    _mm_lfence(); -    _ReadWriteBarrier(); -    const u64 result = __rdtsc(); -    _mm_lfence(); -    _ReadWriteBarrier(); -    return result; -} -#else -static u64 FencedRDTSC() { -    u64 eax; -    u64 edx; -    asm volatile("lfence\n\t" -                 "rdtsc\n\t" -                 "lfence\n\t" -                 : "=a"(eax), "=d"(edx)); -    return (edx << 32) | eax; -} -#endif +NativeClock::NativeClock(u64 rdtsc_frequency_) +    : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, +      ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, +      us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, +      ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, +      cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, +      gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} -template <u64 Nearest> -static u64 RoundToNearest(u64 value) { -    const auto mod = value % Nearest; -    return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +std::chrono::nanoseconds NativeClock::GetTimeNS() const { +    return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)};  } -u64 EstimateRDTSCFrequency() { -    // Discard the first result measuring the rdtsc. -    FencedRDTSC(); -    std::this_thread::sleep_for(std::chrono::milliseconds{1}); -    FencedRDTSC(); - -    // Get the current time. -    const auto start_time = Common::RealTimeClock::Now(); -    const u64 tsc_start = FencedRDTSC(); -    // Wait for 250 milliseconds. -    std::this_thread::sleep_for(std::chrono::milliseconds{250}); -    const auto end_time = Common::RealTimeClock::Now(); -    const u64 tsc_end = FencedRDTSC(); -    // Calculate differences. -    const u64 timer_diff = static_cast<u64>( -        std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); -    const u64 tsc_diff = tsc_end - tsc_start; -    const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); -    return RoundToNearest<1000>(tsc_freq); +std::chrono::microseconds NativeClock::GetTimeUS() const { +    return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)};  } -namespace X64 { -NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, -                         u64 rtsc_frequency_) -    : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ -                                                                               rtsc_frequency_} { -    // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. -    time_sync_thread = std::jthread{[this](std::stop_token token) { -        // Get the current time. -        const auto start_time = Common::RealTimeClock::Now(); -        const u64 tsc_start = FencedRDTSC(); -        // Wait for 10 seconds. -        if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { -            return; -        } -        const auto end_time = Common::RealTimeClock::Now(); -        const u64 tsc_end = FencedRDTSC(); -        // Calculate differences. -        const u64 timer_diff = static_cast<u64>( -            std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); -        const u64 tsc_diff = tsc_end - tsc_start; -        const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); -        rtsc_frequency = tsc_freq; -        CalculateAndSetFactors(); -    }}; - -    time_point.inner.last_measure = FencedRDTSC(); -    time_point.inner.accumulated_ticks = 0U; -    CalculateAndSetFactors(); +std::chrono::milliseconds NativeClock::GetTimeMS() const { +    return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)};  } -u64 NativeClock::GetRTSC() { -    TimePoint new_time_point{}; -    TimePoint current_time_point{}; - -    current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); -    do { -        const u64 current_measure = FencedRDTSC(); -        u64 diff = current_measure - current_time_point.inner.last_measure; -        diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) -        new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure -                                                ? current_measure -                                                : current_time_point.inner.last_measure; -        new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; -    } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, -                                           current_time_point.pack, current_time_point.pack)); -    return new_time_point.inner.accumulated_ticks; +u64 NativeClock::GetCNTPCT() const { +    return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor);  } -void NativeClock::Pause(bool is_paused) { -    if (!is_paused) { -        TimePoint current_time_point{}; -        TimePoint new_time_point{}; - -        current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); -        do { -            new_time_point.pack = current_time_point.pack; -            new_time_point.inner.last_measure = FencedRDTSC(); -        } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, -                                               current_time_point.pack, current_time_point.pack)); -    } +u64 NativeClock::GetGPUTick() const { +    return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor);  } -std::chrono::nanoseconds NativeClock::GetTimeNS() { -    const u64 rtsc_value = GetRTSC(); -    return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; +u64 NativeClock::GetHostTicksNow() const { +    return FencedRDTSC();  } -std::chrono::microseconds NativeClock::GetTimeUS() { -    const u64 rtsc_value = GetRTSC(); -    return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; +u64 NativeClock::GetHostTicksElapsed() const { +    return FencedRDTSC() - start_ticks;  } -std::chrono::milliseconds NativeClock::GetTimeMS() { -    const u64 rtsc_value = GetRTSC(); -    return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; +bool NativeClock::IsNative() const { +    return true;  } -u64 NativeClock::GetClockCycles() { -    const u64 rtsc_value = GetRTSC(); -    return MultiplyHigh(rtsc_value, clock_rtsc_factor); -} - -u64 NativeClock::GetCPUCycles() { -    const u64 rtsc_value = GetRTSC(); -    return MultiplyHigh(rtsc_value, cpu_rtsc_factor); -} - -void NativeClock::CalculateAndSetFactors() { -    ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); -    us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); -    ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); -    clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); -    cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); -} - -} // namespace X64 - -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -3,58 +3,39 @@  #pragma once -#include "common/polyfill_thread.h"  #include "common/wall_clock.h" -namespace Common { +namespace Common::X64 { -namespace X64 {  class NativeClock final : public WallClock {  public: -    explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, -                         u64 rtsc_frequency_); +    explicit NativeClock(u64 rdtsc_frequency_); -    std::chrono::nanoseconds GetTimeNS() override; +    std::chrono::nanoseconds GetTimeNS() const override; -    std::chrono::microseconds GetTimeUS() override; +    std::chrono::microseconds GetTimeUS() const override; -    std::chrono::milliseconds GetTimeMS() override; +    std::chrono::milliseconds GetTimeMS() const override; -    u64 GetClockCycles() override; +    u64 GetCNTPCT() const override; -    u64 GetCPUCycles() override; +    u64 GetGPUTick() const override; -    void Pause(bool is_paused) override; +    u64 GetHostTicksNow() const override; -private: -    u64 GetRTSC(); - -    void CalculateAndSetFactors(); - -    union alignas(16) TimePoint { -        TimePoint() : pack{} {} -        u128 pack{}; -        struct Inner { -            u64 last_measure{}; -            u64 accumulated_ticks{}; -        } inner; -    }; - -    TimePoint time_point; +    u64 GetHostTicksElapsed() const override; -    // factors -    u64 clock_rtsc_factor{}; -    u64 cpu_rtsc_factor{}; -    u64 ns_rtsc_factor{}; -    u64 us_rtsc_factor{}; -    u64 ms_rtsc_factor{}; +    bool IsNative() const override; -    u64 rtsc_frequency; - -    std::jthread time_sync_thread; +private: +    u64 start_ticks; +    u64 rdtsc_frequency; + +    u64 ns_rdtsc_factor; +    u64 us_rdtsc_factor; +    u64 ms_rdtsc_factor; +    u64 cntpct_rdtsc_factor; +    u64 gputick_rdtsc_factor;  }; -} // namespace X64 - -u64 EstimateRDTSCFrequency(); -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <thread> + +#include "common/steady_clock.h" +#include "common/uint128.h" +#include "common/x64/rdtsc.h" + +namespace Common::X64 { + +template <u64 Nearest> +static u64 RoundToNearest(u64 value) { +    const auto mod = value % Nearest; +    return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + +u64 EstimateRDTSCFrequency() { +    // Discard the first result measuring the rdtsc. +    FencedRDTSC(); +    std::this_thread::sleep_for(std::chrono::milliseconds{1}); +    FencedRDTSC(); + +    // Get the current time. +    const auto start_time = RealTimeClock::Now(); +    const u64 tsc_start = FencedRDTSC(); +    // Wait for 100 milliseconds. +    std::this_thread::sleep_for(std::chrono::milliseconds{100}); +    const auto end_time = RealTimeClock::Now(); +    const u64 tsc_end = FencedRDTSC(); +    // Calculate differences. +    const u64 timer_diff = static_cast<u64>( +        std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); +    const u64 tsc_diff = tsc_end - tsc_start; +    const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); +    return RoundToNearest<100'000>(tsc_freq); +} + +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#include "common/common_types.h" + +namespace Common::X64 { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { +    _mm_lfence(); +    _ReadWriteBarrier(); +    const u64 result = __rdtsc(); +    _mm_lfence(); +    _ReadWriteBarrier(); +    return result; +} +#else +static inline u64 FencedRDTSC() { +    u64 eax; +    u64 edx; +    asm volatile("lfence\n\t" +                 "rdtsc\n\t" +                 "lfence\n\t" +                 : "=a"(eax), "=d"(edx)); +    return (edx << 32) | eax; +} +#endif + +u64 EstimateRDTSCFrequency(); + +} // namespace Common::X64 | 
