diff options
| author | liamwhite <liamwhite@users.noreply.github.com> | 2022-06-15 19:39:22 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-06-15 19:39:22 -0400 | 
| commit | 0ae4eae9a6f5ccb351ad1eb2ae35fb35d03661e1 (patch) | |
| tree | 94d07f494151a5701f48e147c75b9e4673d5b918 | |
| parent | 5ace5c1b7a6ce80f3c1186ca72c7ea6d8b18900e (diff) | |
| parent | 25429998e373c12287ae8da2a1c9c1bbe7bd7047 (diff) | |
Merge pull request #8460 from Morph1984/bounded-q
bounded_threadsafe_queue: Use constexpr capacity and mask
| -rw-r--r-- | src/common/bounded_threadsafe_queue.h | 159 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 2 | 
2 files changed, 74 insertions, 87 deletions
| diff --git a/src/common/bounded_threadsafe_queue.h b/src/common/bounded_threadsafe_queue.h index e83064c7f..7e465549b 100644 --- a/src/common/bounded_threadsafe_queue.h +++ b/src/common/bounded_threadsafe_queue.h @@ -1,10 +1,7 @@  // SPDX-FileCopyrightText: Copyright (c) 2020 Erik Rigtorp <erik@rigtorp.se>  // SPDX-License-Identifier: MIT +  #pragma once -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324) -#endif  #include <atomic>  #include <bit> @@ -12,105 +9,63 @@  #include <memory>  #include <mutex>  #include <new> -#include <stdexcept>  #include <stop_token>  #include <type_traits>  #include <utility>  namespace Common { -namespace mpsc { +  #if defined(__cpp_lib_hardware_interference_size)  constexpr size_t hardware_interference_size = std::hardware_destructive_interference_size;  #else  constexpr size_t hardware_interference_size = 64;  #endif -template <typename T> -using AlignedAllocator = std::allocator<T>; - -template <typename T> -struct Slot { -    ~Slot() noexcept { -        if (turn.test()) { -            destroy(); -        } -    } - -    template <typename... Args> -    void construct(Args&&... args) noexcept { -        static_assert(std::is_nothrow_constructible_v<T, Args&&...>, -                      "T must be nothrow constructible with Args&&..."); -        std::construct_at(reinterpret_cast<T*>(&storage), std::forward<Args>(args)...); -    } - -    void destroy() noexcept { -        static_assert(std::is_nothrow_destructible_v<T>, "T must be nothrow destructible"); -        std::destroy_at(reinterpret_cast<T*>(&storage)); -    } - -    T&& move() noexcept { -        return reinterpret_cast<T&&>(storage); -    } - -    // Align to avoid false sharing between adjacent slots -    alignas(hardware_interference_size) std::atomic_flag turn{}; -    struct aligned_store { -        struct type { -            alignas(T) unsigned char data[sizeof(T)]; -        }; -    }; -    typename aligned_store::type storage; -}; +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4324) +#endif -template <typename T, typename Allocator = AlignedAllocator<Slot<T>>> -class Queue { +template <typename T, size_t capacity = 0x400> +class MPSCQueue {  public: -    explicit Queue(const size_t capacity, const Allocator& allocator = Allocator()) -        : allocator_(allocator) { -        if (capacity < 1) { -            throw std::invalid_argument("capacity < 1"); -        } -        // Ensure that the queue length is an integer power of 2 -        // This is so that idx(i) can be a simple i & mask_ insted of i % capacity -        // https://github.com/rigtorp/MPMCQueue/pull/36 -        if (!std::has_single_bit(capacity)) { -            throw std::invalid_argument("capacity must be an integer power of 2"); -        } - -        mask_ = capacity - 1; - +    explicit MPSCQueue() : allocator{std::allocator<Slot<T>>()} {          // Allocate one extra slot to prevent false sharing on the last slot -        slots_ = allocator_.allocate(mask_ + 2); +        slots = allocator.allocate(capacity + 1);          // Allocators are not required to honor alignment for over-aligned types          // (see http://eel.is/c++draft/allocator.requirements#10) so we verify          // alignment here -        if (reinterpret_cast<uintptr_t>(slots_) % alignof(Slot<T>) != 0) { -            allocator_.deallocate(slots_, mask_ + 2); +        if (reinterpret_cast<uintptr_t>(slots) % alignof(Slot<T>) != 0) { +            allocator.deallocate(slots, capacity + 1);              throw std::bad_alloc();          } -        for (size_t i = 0; i < mask_ + 1; ++i) { -            std::construct_at(&slots_[i]); +        for (size_t i = 0; i < capacity; ++i) { +            std::construct_at(&slots[i]);          } +        static_assert(std::has_single_bit(capacity), "capacity must be an integer power of 2");          static_assert(alignof(Slot<T>) == hardware_interference_size,                        "Slot must be aligned to cache line boundary to prevent false sharing");          static_assert(sizeof(Slot<T>) % hardware_interference_size == 0,                        "Slot size must be a multiple of cache line size to prevent "                        "false sharing between adjacent slots"); -        static_assert(sizeof(Queue) % hardware_interference_size == 0, +        static_assert(sizeof(MPSCQueue) % hardware_interference_size == 0,                        "Queue size must be a multiple of cache line size to "                        "prevent false sharing between adjacent queues");      } -    ~Queue() noexcept { -        for (size_t i = 0; i < mask_ + 1; ++i) { -            slots_[i].~Slot(); +    ~MPSCQueue() noexcept { +        for (size_t i = 0; i < capacity; ++i) { +            std::destroy_at(&slots[i]);          } -        allocator_.deallocate(slots_, mask_ + 2); +        allocator.deallocate(slots, capacity + 1);      } -    // non-copyable and non-movable -    Queue(const Queue&) = delete; -    Queue& operator=(const Queue&) = delete; +    // The queue must be both non-copyable and non-movable +    MPSCQueue(const MPSCQueue&) = delete; +    MPSCQueue& operator=(const MPSCQueue&) = delete; + +    MPSCQueue(MPSCQueue&&) = delete; +    MPSCQueue& operator=(MPSCQueue&&) = delete;      void Push(const T& v) noexcept {          static_assert(std::is_nothrow_copy_constructible_v<T>, @@ -125,8 +80,8 @@ public:      void Pop(T& v, std::stop_token stop) noexcept {          auto const tail = tail_.fetch_add(1); -        auto& slot = slots_[idx(tail)]; -        if (false == slot.turn.test()) { +        auto& slot = slots[idx(tail)]; +        if (!slot.turn.test()) {              std::unique_lock lock{cv_mutex};              cv.wait(lock, stop, [&slot] { return slot.turn.test(); });          } @@ -137,12 +92,46 @@ public:      }  private: +    template <typename U = T> +    struct Slot { +        ~Slot() noexcept { +            if (turn.test()) { +                destroy(); +            } +        } + +        template <typename... Args> +        void construct(Args&&... args) noexcept { +            static_assert(std::is_nothrow_constructible_v<U, Args&&...>, +                          "T must be nothrow constructible with Args&&..."); +            std::construct_at(reinterpret_cast<U*>(&storage), std::forward<Args>(args)...); +        } + +        void destroy() noexcept { +            static_assert(std::is_nothrow_destructible_v<U>, "T must be nothrow destructible"); +            std::destroy_at(reinterpret_cast<U*>(&storage)); +        } + +        U&& move() noexcept { +            return reinterpret_cast<U&&>(storage); +        } + +        // Align to avoid false sharing between adjacent slots +        alignas(hardware_interference_size) std::atomic_flag turn{}; +        struct aligned_store { +            struct type { +                alignas(U) unsigned char data[sizeof(U)]; +            }; +        }; +        typename aligned_store::type storage; +    }; +      template <typename... Args>      void emplace(Args&&... args) noexcept {          static_assert(std::is_nothrow_constructible_v<T, Args&&...>,                        "T must be nothrow constructible with Args&&...");          auto const head = head_.fetch_add(1); -        auto& slot = slots_[idx(head)]; +        auto& slot = slots[idx(head)];          slot.turn.wait(true);          slot.construct(std::forward<Args>(args)...);          slot.turn.test_and_set(); @@ -150,31 +139,29 @@ private:      }      constexpr size_t idx(size_t i) const noexcept { -        return i & mask_; +        return i & mask;      } -    std::conditional_t<true, std::condition_variable_any, std::condition_variable> cv; -    std::mutex cv_mutex; -    size_t mask_; -    Slot<T>* slots_; -    [[no_unique_address]] Allocator allocator_; +    static constexpr size_t mask = capacity - 1;      // Align to avoid false sharing between head_ and tail_      alignas(hardware_interference_size) std::atomic<size_t> head_{0};      alignas(hardware_interference_size) std::atomic<size_t> tail_{0}; +    std::mutex cv_mutex; +    std::condition_variable_any cv; + +    Slot<T>* slots; +    [[no_unique_address]] std::allocator<Slot<T>> allocator; +      static_assert(std::is_nothrow_copy_assignable_v<T> || std::is_nothrow_move_assignable_v<T>,                    "T must be nothrow copy or move assignable");      static_assert(std::is_nothrow_destructible_v<T>, "T must be nothrow destructible");  }; -} // namespace mpsc - -template <typename T, typename Allocator = mpsc::AlignedAllocator<mpsc::Slot<T>>> -using MPSCQueue = mpsc::Queue<T, Allocator>; - -} // namespace Common  #ifdef _MSC_VER  #pragma warning(pop)  #endif + +} // namespace Common diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index ad9fd5eff..be0ac2214 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -98,7 +98,7 @@ struct CommandDataContainer {  struct SynchState final {      using CommandQueue = Common::MPSCQueue<CommandDataContainer>;      std::mutex write_lock; -    CommandQueue queue{512}; // size must be 2^n +    CommandQueue queue;      u64 last_fence{};      std::atomic<u64> signaled_fence{};      std::condition_variable_any cv; | 
