diff options
| -rw-r--r-- | src/common/thread.cpp | 12 | ||||
| -rw-r--r-- | src/common/thread.h | 1 | ||||
| -rw-r--r-- | src/common/uint128.h | 5 | ||||
| -rw-r--r-- | src/common/x64/native_clock.cpp | 5 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 6 | ||||
| -rw-r--r-- | src/core/core_timing.cpp | 146 | ||||
| -rw-r--r-- | src/core/core_timing.h | 27 | ||||
| -rw-r--r-- | src/tests/core/core_timing.cpp | 4 | 
8 files changed, 133 insertions, 73 deletions
| diff --git a/src/common/thread.cpp b/src/common/thread.cpp index f932a7290..919e33af9 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -47,6 +47,9 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {      case ThreadPriority::VeryHigh:          windows_priority = THREAD_PRIORITY_HIGHEST;          break; +    case ThreadPriority::Critical: +        windows_priority = THREAD_PRIORITY_TIME_CRITICAL; +        break;      default:          windows_priority = THREAD_PRIORITY_NORMAL;          break; @@ -59,9 +62,10 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {  void SetCurrentThreadPriority(ThreadPriority new_priority) {      pthread_t this_thread = pthread_self(); -    s32 max_prio = sched_get_priority_max(SCHED_OTHER); -    s32 min_prio = sched_get_priority_min(SCHED_OTHER); -    u32 level = static_cast<u32>(new_priority) + 1; +    const auto scheduling_type = SCHED_OTHER; +    s32 max_prio = sched_get_priority_max(scheduling_type); +    s32 min_prio = sched_get_priority_min(scheduling_type); +    u32 level = std::max(static_cast<u32>(new_priority) + 1, 4U);      struct sched_param params;      if (max_prio > min_prio) { @@ -70,7 +74,7 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {          params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4;      } -    pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms); +    pthread_setschedparam(this_thread, scheduling_type, ¶ms);  }  #endif diff --git a/src/common/thread.h b/src/common/thread.h index a63122516..1552f58e0 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -92,6 +92,7 @@ enum class ThreadPriority : u32 {      Normal = 1,      High = 2,      VeryHigh = 3, +    Critical = 4,  };  void SetCurrentThreadPriority(ThreadPriority new_priority); diff --git a/src/common/uint128.h b/src/common/uint128.h index f890ffec2..199d0f55e 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -31,12 +31,17 @@ namespace Common {      return _udiv128(r[1], r[0], d, &remainder);  #endif  #else +#ifdef __SIZEOF_INT128__ +    const auto product = static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b); +    return static_cast<u64>(product / d); +#else      const u64 diva = a / d;      const u64 moda = a % d;      const u64 divb = b / d;      const u64 modb = b % d;      return diva * b + moda * divb + moda * modb / d;  #endif +#endif  }  // This function multiplies 2 u64 values and produces a u128 value; diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 1b7194503..6aaa8cdf9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -75,8 +75,8 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen  }  u64 NativeClock::GetRTSC() { -    TimePoint new_time_point{};      TimePoint current_time_point{}; +    TimePoint new_time_point{};      current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());      do { @@ -89,8 +89,7 @@ u64 NativeClock::GetRTSC() {          new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;      } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,                                             current_time_point.pack, current_time_point.pack)); -    /// The clock cannot be more precise than the guest timer, remove the lower bits -    return new_time_point.inner.accumulated_ticks & inaccuracy_mask; +    return new_time_point.inner.accumulated_ticks;  }  void NativeClock::Pause(bool is_paused) { diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 30d2ba2e9..38ae7a462 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -37,12 +37,8 @@ private:          } inner;      }; -    /// value used to reduce the native clocks accuracy as some apss rely on -    /// undefined behavior where the level of accuracy in the clock shouldn't -    /// be higher. -    static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); -      TimePoint time_point; +      // factors      u64 clock_rtsc_factor{};      u64 cpu_rtsc_factor{}; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 29e7dba9b..140578069 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -6,7 +6,9 @@  #include <string>  #include <tuple> +#include "common/logging/log.h"  #include "common/microprofile.h" +#include "common/thread.h"  #include "core/core_timing.h"  #include "core/core_timing_util.h"  #include "core/hardware_properties.h" @@ -41,11 +43,11 @@ CoreTiming::CoreTiming()  CoreTiming::~CoreTiming() = default; -void CoreTiming::ThreadEntry(CoreTiming& instance) { -    constexpr char name[] = "yuzu:HostTiming"; -    MicroProfileOnThreadCreate(name); -    Common::SetCurrentThreadName(name); -    Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); +void CoreTiming::ThreadEntry(CoreTiming& instance, size_t id) { +    const std::string name = "yuzu:HostTiming_" + std::to_string(id); +    MicroProfileOnThreadCreate(name.c_str()); +    Common::SetCurrentThreadName(name.c_str()); +    Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);      instance.on_thread_init();      instance.ThreadLoop();      MicroProfileOnThreadExit(); @@ -59,68 +61,97 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {      const auto empty_timed_callback = [](std::uintptr_t, std::chrono::nanoseconds) {};      ev_lost = CreateEvent("_lost_event", empty_timed_callback);      if (is_multicore) { -        timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); +        const auto hardware_concurrency = std::thread::hardware_concurrency(); +        size_t id = 0; +        worker_threads.emplace_back(ThreadEntry, std::ref(*this), id++); +        if (hardware_concurrency > 8) { +            worker_threads.emplace_back(ThreadEntry, std::ref(*this), id++); +        }      }  }  void CoreTiming::Shutdown() { -    paused = true; +    is_paused = true;      shutting_down = true; -    pause_event.Set(); -    event.Set(); -    if (timer_thread) { -        timer_thread->join(); +    std::atomic_thread_fence(std::memory_order_release); + +    event_cv.notify_all(); +    wait_pause_cv.notify_all(); +    for (auto& thread : worker_threads) { +        thread.join();      } +    worker_threads.clear();      ClearPendingEvents(); -    timer_thread.reset();      has_started = false;  } -void CoreTiming::Pause(bool is_paused) { -    paused = is_paused; -    pause_event.Set(); +void CoreTiming::Pause(bool is_paused_) { +    std::unique_lock main_lock(event_mutex); +    if (is_paused_ == paused_state.load(std::memory_order_relaxed)) { +        return; +    } +    if (is_multicore) { +        is_paused = is_paused_; +        event_cv.notify_all(); +        if (!is_paused_) { +            wait_pause_cv.notify_all(); +        } +    } +    paused_state.store(is_paused_, std::memory_order_relaxed);  } -void CoreTiming::SyncPause(bool is_paused) { -    if (is_paused == paused && paused_set == paused) { +void CoreTiming::SyncPause(bool is_paused_) { +    std::unique_lock main_lock(event_mutex); +    if (is_paused_ == paused_state.load(std::memory_order_relaxed)) {          return;      } -    Pause(is_paused); -    if (timer_thread) { -        if (!is_paused) { -            pause_event.Set(); + +    if (is_multicore) { +        is_paused = is_paused_; +        event_cv.notify_all(); +        if (!is_paused_) { +            wait_pause_cv.notify_all(); +        } +    } +    paused_state.store(is_paused_, std::memory_order_relaxed); +    if (is_multicore) { +        if (is_paused_) { +            wait_signal_cv.wait(main_lock, [this] { return pause_count == worker_threads.size(); }); +        } else { +            wait_signal_cv.wait(main_lock, [this] { return pause_count == 0; });          } -        event.Set(); -        while (paused_set != is_paused) -            ;      }  }  bool CoreTiming::IsRunning() const { -    return !paused_set; +    return !paused_state.load(std::memory_order_acquire);  }  bool CoreTiming::HasPendingEvents() const { -    return !(wait_set && event_queue.empty()); +    std::unique_lock main_lock(event_mutex); +    return !event_queue.empty() || pending_events.load(std::memory_order_relaxed) != 0;  }  void CoreTiming::ScheduleEvent(std::chrono::nanoseconds ns_into_future,                                 const std::shared_ptr<EventType>& event_type,                                 std::uintptr_t user_data) { -    { -        std::scoped_lock scope{basic_lock}; -        const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count()); -        event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type}); +    std::unique_lock main_lock(event_mutex); +    const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count()); + +    event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type}); +    pending_events.fetch_add(1, std::memory_order_relaxed); -        std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); +    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + +    if (is_multicore) { +        event_cv.notify_one();      } -    event.Set();  }  void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,                                   std::uintptr_t user_data) { -    std::scoped_lock scope{basic_lock}; +    std::unique_lock main_lock(event_mutex);      const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {          return e.type.lock().get() == event_type.get() && e.user_data == user_data;      }); @@ -129,6 +160,7 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,      if (itr != event_queue.end()) {          event_queue.erase(itr, event_queue.end());          std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); +        pending_events.fetch_sub(1, std::memory_order_relaxed);      }  } @@ -168,11 +200,12 @@ u64 CoreTiming::GetClockTicks() const {  }  void CoreTiming::ClearPendingEvents() { +    std::unique_lock main_lock(event_mutex);      event_queue.clear();  }  void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { -    std::scoped_lock lock{basic_lock}; +    std::unique_lock main_lock(event_mutex);      const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {          return e.type.lock().get() == event_type.get(); @@ -186,21 +219,28 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {  }  std::optional<s64> CoreTiming::Advance() { -    std::scoped_lock lock{advance_lock, basic_lock};      global_timer = GetGlobalTimeNs().count(); +    std::unique_lock main_lock(event_mutex);      while (!event_queue.empty() && event_queue.front().time <= global_timer) {          Event evt = std::move(event_queue.front());          std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());          event_queue.pop_back(); -        basic_lock.unlock();          if (const auto event_type{evt.type.lock()}) { -            event_type->callback( -                evt.user_data, std::chrono::nanoseconds{static_cast<s64>(global_timer - evt.time)}); +            sequence_mutex.lock(); +            event_mutex.unlock(); + +            event_type->guard.lock(); +            sequence_mutex.unlock(); +            const s64 delay = static_cast<s64>(GetGlobalTimeNs().count() - evt.time); +            event_type->callback(evt.user_data, std::chrono::nanoseconds{delay}); +            event_type->guard.unlock(); + +            event_mutex.lock(); +            pending_events.fetch_sub(1, std::memory_order_relaxed);          } -        basic_lock.lock();          global_timer = GetGlobalTimeNs().count();      } @@ -213,26 +253,34 @@ std::optional<s64> CoreTiming::Advance() {  }  void CoreTiming::ThreadLoop() { +    const auto predicate = [this] { return !event_queue.empty() || is_paused; };      has_started = true;      while (!shutting_down) { -        while (!paused) { -            paused_set = false; +        while (!is_paused && !shutting_down) {              const auto next_time = Advance();              if (next_time) {                  if (*next_time > 0) {                      std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); -                    event.WaitFor(next_time_ns); +                    std::unique_lock main_lock(event_mutex); +                    event_cv.wait_for(main_lock, next_time_ns, predicate);                  }              } else { -                wait_set = true; -                event.Wait(); +                std::unique_lock main_lock(event_mutex); +                event_cv.wait(main_lock, predicate);              } -            wait_set = false;          } -        paused_set = true; -        clock->Pause(true); -        pause_event.Wait(); -        clock->Pause(false); +        std::unique_lock main_lock(event_mutex); +        pause_count++; +        if (pause_count == worker_threads.size()) { +            clock->Pause(true); +            wait_signal_cv.notify_all(); +        } +        wait_pause_cv.wait(main_lock, [this] { return !is_paused || shutting_down; }); +        pause_count--; +        if (pause_count == 0) { +            clock->Pause(false); +            wait_signal_cv.notify_all(); +        }      }  } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index d27773009..a86553e08 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -5,6 +5,7 @@  #include <atomic>  #include <chrono> +#include <condition_variable>  #include <functional>  #include <memory>  #include <mutex> @@ -14,7 +15,6 @@  #include <vector>  #include "common/common_types.h" -#include "common/thread.h"  #include "common/wall_clock.h"  namespace Core::Timing { @@ -32,6 +32,7 @@ struct EventType {      TimedCallback callback;      /// A pointer to the name of the event.      const std::string name; +    mutable std::mutex guard;  };  /** @@ -131,7 +132,7 @@ private:      /// Clear all pending events. This should ONLY be done on exit.      void ClearPendingEvents(); -    static void ThreadEntry(CoreTiming& instance); +    static void ThreadEntry(CoreTiming& instance, size_t id);      void ThreadLoop();      std::unique_ptr<Common::WallClock> clock; @@ -144,21 +145,25 @@ private:      // accomodated by the standard adaptor class.      std::vector<Event> event_queue;      u64 event_fifo_id = 0; +    std::atomic<size_t> pending_events{};      std::shared_ptr<EventType> ev_lost; -    Common::Event event{}; -    Common::Event pause_event{}; -    std::mutex basic_lock; -    std::mutex advance_lock; -    std::unique_ptr<std::thread> timer_thread; -    std::atomic<bool> paused{}; -    std::atomic<bool> paused_set{}; -    std::atomic<bool> wait_set{}; -    std::atomic<bool> shutting_down{};      std::atomic<bool> has_started{};      std::function<void()> on_thread_init{}; +    std::vector<std::thread> worker_threads; + +    std::condition_variable event_cv; +    std::condition_variable wait_pause_cv; +    std::condition_variable wait_signal_cv; +    mutable std::mutex event_mutex; +    mutable std::mutex sequence_mutex; + +    std::atomic<bool> paused_state{}; +    bool is_paused{}; +    bool shutting_down{};      bool is_multicore{}; +    size_t pause_count{};      /// Cycle timing      u64 ticks{}; diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 8358d36b5..e687416a8 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -8,6 +8,7 @@  #include <chrono>  #include <cstdlib>  #include <memory> +#include <mutex>  #include <string>  #include "core/core.h" @@ -21,13 +22,14 @@ std::array<s64, 5> delays{};  std::bitset<CB_IDS.size()> callbacks_ran_flags;  u64 expected_callback = 0; +std::mutex control_mutex;  template <unsigned int IDX>  void HostCallbackTemplate(std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { +    std::unique_lock<std::mutex> lk(control_mutex);      static_assert(IDX < CB_IDS.size(), "IDX out of range");      callbacks_ran_flags.set(IDX);      REQUIRE(CB_IDS[IDX] == user_data); -    REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);      delays[IDX] = ns_late.count();      ++expected_callback;  } | 
