diff options
Diffstat (limited to 'src')
177 files changed, 4802 insertions, 2999 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8777df751..61adbef28 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,10 +45,15 @@ if (MSVC) # Warnings /W3 + /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled + /we4101 # 'identifier': unreferenced local variable + /we4265 # 'class': class has virtual functions, but destructor is not virtual + /we4388 # signed/unsigned mismatch /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? /we4555 # Expression has no effect; expected expression with side-effect /we4834 # Discarding return value of function with 'nodiscard' attribute + /we5038 # data member 'member1' will be initialized after data member 'member2' ) # /GS- - No stack buffer overflow checks diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2c2bd2ee8..5d781cd77 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -123,6 +123,7 @@ add_library(common STATIC hash.h hex_util.cpp hex_util.h + intrusive_red_black_tree.h logging/backend.cpp logging/backend.h logging/filter.cpp @@ -143,6 +144,7 @@ add_library(common STATIC page_table.h param_package.cpp param_package.h + parent_of_member.h quaternion.h ring_buffer.h scm_rev.cpp @@ -167,6 +169,7 @@ add_library(common STATIC time_zone.h timer.cpp timer.h + tree.h uint128.cpp uint128.h uuid.cpp diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 367b6bf6e..c90978f9c 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -93,6 +93,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void); return static_cast<T>(key) == 0; \ } +/// Evaluates a boolean expression, and returns a result unless that expression is true. +#define R_UNLESS(expr, res) \ + { \ + if (!(expr)) { \ + return res; \ + } \ + } + namespace Common { [[nodiscard]] constexpr u32 MakeMagic(char a, char b, char c, char d) { diff --git a/src/common/div_ceil.h b/src/common/div_ceil.h index 6b2c48f91..95e1489a9 100644 --- a/src/common/div_ceil.h +++ b/src/common/div_ceil.h @@ -11,16 +11,16 @@ namespace Common { /// Ceiled integer division. template <typename N, typename D> -requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeil( - N number, D divisor) { - return (static_cast<D>(number) + divisor - 1) / divisor; +requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeil(N number, + D divisor) { + return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor); } /// Ceiled integer division with logarithmic divisor in base 2 template <typename N, typename D> -requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeilLog2( +requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeilLog2( N value, D alignment_log2) { - return (static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2; + return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2); } } // namespace Common diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h new file mode 100644 index 000000000..fb55de94e --- /dev/null +++ b/src/common/intrusive_red_black_tree.h @@ -0,0 +1,627 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/parent_of_member.h" +#include "common/tree.h" + +namespace Common { + +namespace impl { + +class IntrusiveRedBlackTreeImpl; + +} + +struct IntrusiveRedBlackTreeNode { + +private: + RB_ENTRY(IntrusiveRedBlackTreeNode) entry{}; + + friend class impl::IntrusiveRedBlackTreeImpl; + + template <class, class, class> + friend class IntrusiveRedBlackTree; + +public: + constexpr IntrusiveRedBlackTreeNode() = default; +}; + +template <class T, class Traits, class Comparator> +class IntrusiveRedBlackTree; + +namespace impl { + +class IntrusiveRedBlackTreeImpl { + +private: + template <class, class, class> + friend class ::Common::IntrusiveRedBlackTree; + +private: + RB_HEAD(IntrusiveRedBlackTreeRoot, IntrusiveRedBlackTreeNode); + using RootType = IntrusiveRedBlackTreeRoot; + +private: + IntrusiveRedBlackTreeRoot root; + +public: + template <bool Const> + class Iterator; + + using value_type = IntrusiveRedBlackTreeNode; + using size_type = size_t; + using difference_type = ptrdiff_t; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = Iterator<false>; + using const_iterator = Iterator<true>; + + template <bool Const> + class Iterator { + public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = typename IntrusiveRedBlackTreeImpl::value_type; + using difference_type = typename IntrusiveRedBlackTreeImpl::difference_type; + using pointer = std::conditional_t<Const, IntrusiveRedBlackTreeImpl::const_pointer, + IntrusiveRedBlackTreeImpl::pointer>; + using reference = std::conditional_t<Const, IntrusiveRedBlackTreeImpl::const_reference, + IntrusiveRedBlackTreeImpl::reference>; + + private: + pointer node; + + public: + explicit Iterator(pointer n) : node(n) {} + + bool operator==(const Iterator& rhs) const { + return this->node == rhs.node; + } + + bool operator!=(const Iterator& rhs) const { + return !(*this == rhs); + } + + pointer operator->() const { + return this->node; + } + + reference operator*() const { + return *this->node; + } + + Iterator& operator++() { + this->node = GetNext(this->node); + return *this; + } + + Iterator& operator--() { + this->node = GetPrev(this->node); + return *this; + } + + Iterator operator++(int) { + const Iterator it{*this}; + ++(*this); + return it; + } + + Iterator operator--(int) { + const Iterator it{*this}; + --(*this); + return it; + } + + operator Iterator<true>() const { + return Iterator<true>(this->node); + } + }; + +protected: + // Generate static implementations for non-comparison operations for IntrusiveRedBlackTreeRoot. + RB_GENERATE_WITHOUT_COMPARE_STATIC(IntrusiveRedBlackTreeRoot, IntrusiveRedBlackTreeNode, entry); + +private: + // Define accessors using RB_* functions. + constexpr void InitializeImpl() { + RB_INIT(&this->root); + } + + bool EmptyImpl() const { + return RB_EMPTY(&this->root); + } + + IntrusiveRedBlackTreeNode* GetMinImpl() const { + return RB_MIN(IntrusiveRedBlackTreeRoot, + const_cast<IntrusiveRedBlackTreeRoot*>(&this->root)); + } + + IntrusiveRedBlackTreeNode* GetMaxImpl() const { + return RB_MAX(IntrusiveRedBlackTreeRoot, + const_cast<IntrusiveRedBlackTreeRoot*>(&this->root)); + } + + IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) { + return RB_REMOVE(IntrusiveRedBlackTreeRoot, &this->root, node); + } + +public: + static IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) { + return RB_NEXT(IntrusiveRedBlackTreeRoot, nullptr, node); + } + + static IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) { + return RB_PREV(IntrusiveRedBlackTreeRoot, nullptr, node); + } + + static IntrusiveRedBlackTreeNode const* GetNext(const IntrusiveRedBlackTreeNode* node) { + return static_cast<const IntrusiveRedBlackTreeNode*>( + GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node))); + } + + static IntrusiveRedBlackTreeNode const* GetPrev(const IntrusiveRedBlackTreeNode* node) { + return static_cast<const IntrusiveRedBlackTreeNode*>( + GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node))); + } + +public: + constexpr IntrusiveRedBlackTreeImpl() : root() { + this->InitializeImpl(); + } + + // Iterator accessors. + iterator begin() { + return iterator(this->GetMinImpl()); + } + + const_iterator begin() const { + return const_iterator(this->GetMinImpl()); + } + + iterator end() { + return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr)); + } + + const_iterator end() const { + return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr)); + } + + const_iterator cbegin() const { + return this->begin(); + } + + const_iterator cend() const { + return this->end(); + } + + iterator iterator_to(reference ref) { + return iterator(&ref); + } + + const_iterator iterator_to(const_reference ref) const { + return const_iterator(&ref); + } + + // Content management. + bool empty() const { + return this->EmptyImpl(); + } + + reference back() { + return *this->GetMaxImpl(); + } + + const_reference back() const { + return *this->GetMaxImpl(); + } + + reference front() { + return *this->GetMinImpl(); + } + + const_reference front() const { + return *this->GetMinImpl(); + } + + iterator erase(iterator it) { + auto cur = std::addressof(*it); + auto next = GetNext(cur); + this->RemoveImpl(cur); + return iterator(next); + } +}; + +} // namespace impl + +template <typename T> +concept HasLightCompareType = requires { + { std::is_same<typename T::LightCompareType, void>::value } + ->std::convertible_to<bool>; +}; + +namespace impl { + +template <typename T, typename Default> +consteval auto* GetLightCompareType() { + if constexpr (HasLightCompareType<T>) { + return static_cast<typename T::LightCompareType*>(nullptr); + } else { + return static_cast<Default*>(nullptr); + } +} + +} // namespace impl + +template <typename T, typename Default> +using LightCompareType = std::remove_pointer_t<decltype(impl::GetLightCompareType<T, Default>())>; + +template <class T, class Traits, class Comparator> +class IntrusiveRedBlackTree { + +public: + using ImplType = impl::IntrusiveRedBlackTreeImpl; + +private: + ImplType impl{}; + +public: + struct IntrusiveRedBlackTreeRootWithCompare : ImplType::IntrusiveRedBlackTreeRoot {}; + + template <bool Const> + class Iterator; + + using value_type = T; + using size_type = size_t; + using difference_type = ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using iterator = Iterator<false>; + using const_iterator = Iterator<true>; + + using light_value_type = LightCompareType<Comparator, value_type>; + using const_light_pointer = const light_value_type*; + using const_light_reference = const light_value_type&; + + template <bool Const> + class Iterator { + public: + friend class IntrusiveRedBlackTree<T, Traits, Comparator>; + + using ImplIterator = + std::conditional_t<Const, ImplType::const_iterator, ImplType::iterator>; + + using iterator_category = std::bidirectional_iterator_tag; + using value_type = typename IntrusiveRedBlackTree::value_type; + using difference_type = typename IntrusiveRedBlackTree::difference_type; + using pointer = std::conditional_t<Const, IntrusiveRedBlackTree::const_pointer, + IntrusiveRedBlackTree::pointer>; + using reference = std::conditional_t<Const, IntrusiveRedBlackTree::const_reference, + IntrusiveRedBlackTree::reference>; + + private: + ImplIterator iterator; + + private: + explicit Iterator(ImplIterator it) : iterator(it) {} + + explicit Iterator(typename std::conditional<Const, ImplType::const_iterator, + ImplType::iterator>::type::pointer ptr) + : iterator(ptr) {} + + ImplIterator GetImplIterator() const { + return this->iterator; + } + + public: + bool operator==(const Iterator& rhs) const { + return this->iterator == rhs.iterator; + } + + bool operator!=(const Iterator& rhs) const { + return !(*this == rhs); + } + + pointer operator->() const { + return Traits::GetParent(std::addressof(*this->iterator)); + } + + reference operator*() const { + return *Traits::GetParent(std::addressof(*this->iterator)); + } + + Iterator& operator++() { + ++this->iterator; + return *this; + } + + Iterator& operator--() { + --this->iterator; + return *this; + } + + Iterator operator++(int) { + const Iterator it{*this}; + ++this->iterator; + return it; + } + + Iterator operator--(int) { + const Iterator it{*this}; + --this->iterator; + return it; + } + + operator Iterator<true>() const { + return Iterator<true>(this->iterator); + } + }; + +private: + // Generate static implementations for comparison operations for IntrusiveRedBlackTreeRoot. + RB_GENERATE_WITH_COMPARE_STATIC(IntrusiveRedBlackTreeRootWithCompare, IntrusiveRedBlackTreeNode, + entry, CompareImpl, LightCompareImpl); + +private: + static int CompareImpl(const IntrusiveRedBlackTreeNode* lhs, + const IntrusiveRedBlackTreeNode* rhs) { + return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs)); + } + + static int LightCompareImpl(const void* elm, const IntrusiveRedBlackTreeNode* rhs) { + return Comparator::Compare(*static_cast<const_light_pointer>(elm), *Traits::GetParent(rhs)); + } + + // Define accessors using RB_* functions. + IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) { + return RB_INSERT(IntrusiveRedBlackTreeRootWithCompare, + static_cast<IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root), + node); + } + + IntrusiveRedBlackTreeNode* FindImpl(const IntrusiveRedBlackTreeNode* node) const { + return RB_FIND( + IntrusiveRedBlackTreeRootWithCompare, + const_cast<IntrusiveRedBlackTreeRootWithCompare*>( + static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)), + const_cast<IntrusiveRedBlackTreeNode*>(node)); + } + + IntrusiveRedBlackTreeNode* NFindImpl(const IntrusiveRedBlackTreeNode* node) const { + return RB_NFIND( + IntrusiveRedBlackTreeRootWithCompare, + const_cast<IntrusiveRedBlackTreeRootWithCompare*>( + static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)), + const_cast<IntrusiveRedBlackTreeNode*>(node)); + } + + IntrusiveRedBlackTreeNode* FindLightImpl(const_light_pointer lelm) const { + return RB_FIND_LIGHT( + IntrusiveRedBlackTreeRootWithCompare, + const_cast<IntrusiveRedBlackTreeRootWithCompare*>( + static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)), + static_cast<const void*>(lelm)); + } + + IntrusiveRedBlackTreeNode* NFindLightImpl(const_light_pointer lelm) const { + return RB_NFIND_LIGHT( + IntrusiveRedBlackTreeRootWithCompare, + const_cast<IntrusiveRedBlackTreeRootWithCompare*>( + static_cast<const IntrusiveRedBlackTreeRootWithCompare*>(&this->impl.root)), + static_cast<const void*>(lelm)); + } + +public: + constexpr IntrusiveRedBlackTree() = default; + + // Iterator accessors. + iterator begin() { + return iterator(this->impl.begin()); + } + + const_iterator begin() const { + return const_iterator(this->impl.begin()); + } + + iterator end() { + return iterator(this->impl.end()); + } + + const_iterator end() const { + return const_iterator(this->impl.end()); + } + + const_iterator cbegin() const { + return this->begin(); + } + + const_iterator cend() const { + return this->end(); + } + + iterator iterator_to(reference ref) { + return iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); + } + + const_iterator iterator_to(const_reference ref) const { + return const_iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); + } + + // Content management. + bool empty() const { + return this->impl.empty(); + } + + reference back() { + return *Traits::GetParent(std::addressof(this->impl.back())); + } + + const_reference back() const { + return *Traits::GetParent(std::addressof(this->impl.back())); + } + + reference front() { + return *Traits::GetParent(std::addressof(this->impl.front())); + } + + const_reference front() const { + return *Traits::GetParent(std::addressof(this->impl.front())); + } + + iterator erase(iterator it) { + return iterator(this->impl.erase(it.GetImplIterator())); + } + + iterator insert(reference ref) { + ImplType::pointer node = Traits::GetNode(std::addressof(ref)); + this->InsertImpl(node); + return iterator(node); + } + + iterator find(const_reference ref) const { + return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref)))); + } + + iterator nfind(const_reference ref) const { + return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref)))); + } + + iterator find_light(const_light_reference ref) const { + return iterator(this->FindLightImpl(std::addressof(ref))); + } + + iterator nfind_light(const_light_reference ref) const { + return iterator(this->NFindLightImpl(std::addressof(ref))); + } +}; + +template <auto T, class Derived = impl::GetParentType<T>> +class IntrusiveRedBlackTreeMemberTraits; + +template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived> +class IntrusiveRedBlackTreeMemberTraits<Member, Derived> { +public: + template <class Comparator> + using TreeType = IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraits, Comparator>; + using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl; + +private: + template <class, class, class> + friend class IntrusiveRedBlackTree; + + friend class impl::IntrusiveRedBlackTreeImpl; + + static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) { + return std::addressof(parent->*Member); + } + + static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) { + return std::addressof(parent->*Member); + } + + static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { + return GetParentPointer<Member, Derived>(node); + } + + static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { + return GetParentPointer<Member, Derived>(node); + } + +private: + static constexpr TypedStorage<Derived> DerivedStorage = {}; + static_assert(GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage)); +}; + +template <auto T, class Derived = impl::GetParentType<T>> +class IntrusiveRedBlackTreeMemberTraitsDeferredAssert; + +template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived> +class IntrusiveRedBlackTreeMemberTraitsDeferredAssert<Member, Derived> { +public: + template <class Comparator> + using TreeType = + IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>; + using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl; + + static constexpr bool IsValid() { + TypedStorage<Derived> DerivedStorage = {}; + return GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage); + } + +private: + template <class, class, class> + friend class IntrusiveRedBlackTree; + + friend class impl::IntrusiveRedBlackTreeImpl; + + static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) { + return std::addressof(parent->*Member); + } + + static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) { + return std::addressof(parent->*Member); + } + + static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { + return GetParentPointer<Member, Derived>(node); + } + + static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { + return GetParentPointer<Member, Derived>(node); + } +}; + +template <class Derived> +class IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode { +public: + constexpr Derived* GetPrev() { + return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this)); + } + constexpr const Derived* GetPrev() const { + return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this)); + } + + constexpr Derived* GetNext() { + return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this)); + } + constexpr const Derived* GetNext() const { + return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this)); + } +}; + +template <class Derived> +class IntrusiveRedBlackTreeBaseTraits { +public: + template <class Comparator> + using TreeType = IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeBaseTraits, Comparator>; + using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl; + +private: + template <class, class, class> + friend class IntrusiveRedBlackTree; + + friend class impl::IntrusiveRedBlackTreeImpl; + + static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) { + return static_cast<IntrusiveRedBlackTreeNode*>(parent); + } + + static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) { + return static_cast<const IntrusiveRedBlackTreeNode*>(parent); + } + + static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { + return static_cast<Derived*>(node); + } + + static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { + return static_cast<const Derived*>(node); + } +}; + +} // namespace Common diff --git a/src/common/parent_of_member.h b/src/common/parent_of_member.h new file mode 100644 index 000000000..d9a14529d --- /dev/null +++ b/src/common/parent_of_member.h @@ -0,0 +1,191 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <type_traits> + +#include "common/assert.h" +#include "common/common_types.h" + +namespace Common { +namespace detail { +template <typename T, size_t Size, size_t Align> +struct TypedStorageImpl { + std::aligned_storage_t<Size, Align> storage_; +}; +} // namespace detail + +template <typename T> +using TypedStorage = detail::TypedStorageImpl<T, sizeof(T), alignof(T)>; + +template <typename T> +static constexpr T* GetPointer(TypedStorage<T>& ts) { + return static_cast<T*>(static_cast<void*>(std::addressof(ts.storage_))); +} + +template <typename T> +static constexpr const T* GetPointer(const TypedStorage<T>& ts) { + return static_cast<const T*>(static_cast<const void*>(std::addressof(ts.storage_))); +} + +namespace impl { + +template <size_t MaxDepth> +struct OffsetOfUnionHolder { + template <typename ParentType, typename MemberType, size_t Offset> + union UnionImpl { + using PaddingMember = char; + static constexpr size_t GetOffset() { + return Offset; + } + +#pragma pack(push, 1) + struct { + PaddingMember padding[Offset]; + MemberType members[(sizeof(ParentType) / sizeof(MemberType)) + 1]; + } data; +#pragma pack(pop) + UnionImpl<ParentType, MemberType, Offset + 1> next_union; + }; + + template <typename ParentType, typename MemberType> + union UnionImpl<ParentType, MemberType, 0> { + static constexpr size_t GetOffset() { + return 0; + } + + struct { + MemberType members[(sizeof(ParentType) / sizeof(MemberType)) + 1]; + } data; + UnionImpl<ParentType, MemberType, 1> next_union; + }; + + template <typename ParentType, typename MemberType> + union UnionImpl<ParentType, MemberType, MaxDepth> {}; +}; + +template <typename ParentType, typename MemberType> +struct OffsetOfCalculator { + using UnionHolder = + typename OffsetOfUnionHolder<sizeof(MemberType)>::template UnionImpl<ParentType, MemberType, + 0>; + union Union { + char c{}; + UnionHolder first_union; + TypedStorage<ParentType> parent; + + constexpr Union() : c() {} + }; + static constexpr Union U = {}; + + static constexpr const MemberType* GetNextAddress(const MemberType* start, + const MemberType* target) { + while (start < target) { + start++; + } + return start; + } + + static constexpr std::ptrdiff_t GetDifference(const MemberType* start, + const MemberType* target) { + return (target - start) * sizeof(MemberType); + } + + template <typename CurUnion> + static constexpr std::ptrdiff_t OffsetOfImpl(MemberType ParentType::*member, + CurUnion& cur_union) { + constexpr size_t Offset = CurUnion::GetOffset(); + const auto target = std::addressof(GetPointer(U.parent)->*member); + const auto start = std::addressof(cur_union.data.members[0]); + const auto next = GetNextAddress(start, target); + + if (next != target) { + if constexpr (Offset < sizeof(MemberType) - 1) { + return OffsetOfImpl(member, cur_union.next_union); + } else { + UNREACHABLE(); + } + } + + return (next - start) * sizeof(MemberType) + Offset; + } + + static constexpr std::ptrdiff_t OffsetOf(MemberType ParentType::*member) { + return OffsetOfImpl(member, U.first_union); + } +}; + +template <typename T> +struct GetMemberPointerTraits; + +template <typename P, typename M> +struct GetMemberPointerTraits<M P::*> { + using Parent = P; + using Member = M; +}; + +template <auto MemberPtr> +using GetParentType = typename GetMemberPointerTraits<decltype(MemberPtr)>::Parent; + +template <auto MemberPtr> +using GetMemberType = typename GetMemberPointerTraits<decltype(MemberPtr)>::Member; + +template <auto MemberPtr, typename RealParentType = GetParentType<MemberPtr>> +static inline std::ptrdiff_t OffsetOf = [] { + using DeducedParentType = GetParentType<MemberPtr>; + using MemberType = GetMemberType<MemberPtr>; + static_assert(std::is_base_of<DeducedParentType, RealParentType>::value || + std::is_same<RealParentType, DeducedParentType>::value); + + return OffsetOfCalculator<RealParentType, MemberType>::OffsetOf(MemberPtr); +}(); + +} // namespace impl + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType& GetParentReference(impl::GetMemberType<MemberPtr>* member) { + std::ptrdiff_t Offset = impl::OffsetOf<MemberPtr, RealParentType>; + return *static_cast<RealParentType*>( + static_cast<void*>(static_cast<uint8_t*>(static_cast<void*>(member)) - Offset)); +} + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType const& GetParentReference(impl::GetMemberType<MemberPtr> const* member) { + std::ptrdiff_t Offset = impl::OffsetOf<MemberPtr, RealParentType>; + return *static_cast<const RealParentType*>(static_cast<const void*>( + static_cast<const uint8_t*>(static_cast<const void*>(member)) - Offset)); +} + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType* GetParentPointer(impl::GetMemberType<MemberPtr>* member) { + return std::addressof(GetParentReference<MemberPtr, RealParentType>(member)); +} + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType const* GetParentPointer(impl::GetMemberType<MemberPtr> const* member) { + return std::addressof(GetParentReference<MemberPtr, RealParentType>(member)); +} + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType& GetParentReference(impl::GetMemberType<MemberPtr>& member) { + return GetParentReference<MemberPtr, RealParentType>(std::addressof(member)); +} + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType const& GetParentReference(impl::GetMemberType<MemberPtr> const& member) { + return GetParentReference<MemberPtr, RealParentType>(std::addressof(member)); +} + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType* GetParentPointer(impl::GetMemberType<MemberPtr>& member) { + return std::addressof(GetParentReference<MemberPtr, RealParentType>(member)); +} + +template <auto MemberPtr, typename RealParentType = impl::GetParentType<MemberPtr>> +constexpr RealParentType const* GetParentPointer(impl::GetMemberType<MemberPtr> const& member) { + return std::addressof(GetParentReference<MemberPtr, RealParentType>(member)); +} + +} // namespace Common diff --git a/src/common/tree.h b/src/common/tree.h new file mode 100644 index 000000000..a6b636646 --- /dev/null +++ b/src/common/tree.h @@ -0,0 +1,822 @@ +/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */ +/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ +/* $FreeBSD$ */ + +/*- + * Copyright 2002 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_TREE_H_ +#define _SYS_TREE_H_ + +/* FreeBSD <sys/cdefs.h> has a lot of defines we don't really want. */ +/* tree.h only actually uses __inline and __unused, so we'll just define those. */ + +/* #include <sys/cdefs.h> */ + +#ifndef __inline +#define __inline inline +#endif + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ + struct name { \ + struct type* sph_root; /* root of the tree */ \ + } + +#define SPLAY_INITIALIZER(root) \ + { NULL } + +#define SPLAY_INIT(root) \ + do { \ + (root)->sph_root = NULL; \ + } while (/*CONSTCOND*/ 0) + +#define SPLAY_ENTRY(type) \ + struct { \ + struct type* spe_left; /* left element */ \ + struct type* spe_right; /* right element */ \ + } + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) \ + do { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ + } while (/*CONSTCOND*/ 0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) \ + do { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ + } while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKLEFT(head, tmp, field) \ + do { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ + } while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKRIGHT(head, tmp, field) \ + do { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ + } while (/*CONSTCOND*/ 0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) \ + do { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field); \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ + } while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ + void name##_SPLAY(struct name*, struct type*); \ + void name##_SPLAY_MINMAX(struct name*, int); \ + struct type* name##_SPLAY_INSERT(struct name*, struct type*); \ + struct type* name##_SPLAY_REMOVE(struct name*, struct type*); \ + \ + /* Finds the node with the same key as elm */ \ + static __inline struct type* name##_SPLAY_FIND(struct name* head, struct type* elm) { \ + if (SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ + } \ + \ + static __inline struct type* name##_SPLAY_NEXT(struct name* head, struct type* elm) { \ + name##_SPLAY(head, elm); \ + if (SPLAY_RIGHT(elm, field) != NULL) { \ + elm = SPLAY_RIGHT(elm, field); \ + while (SPLAY_LEFT(elm, field) != NULL) { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } else \ + elm = NULL; \ + return (elm); \ + } \ + \ + static __inline struct type* name##_SPLAY_MIN_MAX(struct name* head, int val) { \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ + } + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ + struct type* name##_SPLAY_INSERT(struct name* head, struct type* elm) { \ + if (SPLAY_EMPTY(head)) { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } else { \ + int __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if (__comp < 0) { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } else if (__comp > 0) { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field); \ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ + } \ + \ + struct type* name##_SPLAY_REMOVE(struct name* head, struct type* elm) { \ + struct type* __tmp; \ + if (SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) { \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ + } else { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ + } \ + \ + void name##_SPLAY(struct name* head, struct type* elm) { \ + struct type __node, *__left, *__right, *__tmp; \ + int __comp; \ + \ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL; \ + __left = __right = &__node; \ + \ + while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) < 0) { \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) > 0) { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ + } \ + \ + /* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ + void name##_SPLAY_MINMAX(struct name* head, int __comp) { \ + struct type __node, *__left, *__right, *__tmp; \ + \ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL; \ + __left = __right = &__node; \ + \ + while (1) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp < 0) { \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp > 0) { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ + } + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) \ + for ((x) = SPLAY_MIN(name, head); (x) != NULL; (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-black tree */ +#define RB_HEAD(name, type) \ + struct name { \ + struct type* rbh_root; /* root of the tree */ \ + } + +#define RB_INITIALIZER(root) \ + { NULL } + +#define RB_INIT(root) \ + do { \ + (root)->rbh_root = NULL; \ + } while (/*CONSTCOND*/ 0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \ + struct { \ + struct type* rbe_left; /* left element */ \ + struct type* rbe_right; /* right element */ \ + struct type* rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ + } + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) \ + do { \ + RB_PARENT(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ + RB_COLOR(elm, field) = RB_RED; \ + } while (/*CONSTCOND*/ 0) + +#define RB_SET_BLACKRED(black, red, field) \ + do { \ + RB_COLOR(black, field) = RB_BLACK; \ + RB_COLOR(red, field) = RB_RED; \ + } while (/*CONSTCOND*/ 0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) \ + do { \ + } while (0) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) \ + do { \ + (tmp) = RB_RIGHT(elm, field); \ + if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) { \ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_LEFT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ + } while (/*CONSTCOND*/ 0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) \ + do { \ + (tmp) = RB_LEFT(elm, field); \ + if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) { \ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_RIGHT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ + } while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, ) +#define RB_PROTOTYPE_STATIC(name, type, field, cmp) \ + RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static) +#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \ + RB_PROTOTYPE_INSERT_COLOR(name, type, attr); \ + RB_PROTOTYPE_REMOVE_COLOR(name, type, attr); \ + RB_PROTOTYPE_INSERT(name, type, attr); \ + RB_PROTOTYPE_REMOVE(name, type, attr); \ + RB_PROTOTYPE_FIND(name, type, attr); \ + RB_PROTOTYPE_NFIND(name, type, attr); \ + RB_PROTOTYPE_FIND_LIGHT(name, type, attr); \ + RB_PROTOTYPE_NFIND_LIGHT(name, type, attr); \ + RB_PROTOTYPE_NEXT(name, type, attr); \ + RB_PROTOTYPE_PREV(name, type, attr); \ + RB_PROTOTYPE_MINMAX(name, type, attr); +#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr) \ + attr void name##_RB_INSERT_COLOR(struct name*, struct type*) +#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr) \ + attr void name##_RB_REMOVE_COLOR(struct name*, struct type*, struct type*) +#define RB_PROTOTYPE_REMOVE(name, type, attr) \ + attr struct type* name##_RB_REMOVE(struct name*, struct type*) +#define RB_PROTOTYPE_INSERT(name, type, attr) \ + attr struct type* name##_RB_INSERT(struct name*, struct type*) +#define RB_PROTOTYPE_FIND(name, type, attr) \ + attr struct type* name##_RB_FIND(struct name*, struct type*) +#define RB_PROTOTYPE_NFIND(name, type, attr) \ + attr struct type* name##_RB_NFIND(struct name*, struct type*) +#define RB_PROTOTYPE_FIND_LIGHT(name, type, attr) \ + attr struct type* name##_RB_FIND_LIGHT(struct name*, const void*) +#define RB_PROTOTYPE_NFIND_LIGHT(name, type, attr) \ + attr struct type* name##_RB_NFIND_LIGHT(struct name*, const void*) +#define RB_PROTOTYPE_NEXT(name, type, attr) attr struct type* name##_RB_NEXT(struct type*) +#define RB_PROTOTYPE_PREV(name, type, attr) attr struct type* name##_RB_PREV(struct type*) +#define RB_PROTOTYPE_MINMAX(name, type, attr) attr struct type* name##_RB_MINMAX(struct name*, int) + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE_WITHOUT_COMPARE(name, type, field) \ + RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, ) +#define RB_GENERATE_WITHOUT_COMPARE_STATIC(name, type, field) \ + RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, static) +#define RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, attr) \ + RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ + RB_GENERATE_REMOVE(name, type, field, attr) \ + RB_GENERATE_NEXT(name, type, field, attr) \ + RB_GENERATE_PREV(name, type, field, attr) \ + RB_GENERATE_MINMAX(name, type, field, attr) + +#define RB_GENERATE_WITH_COMPARE(name, type, field, cmp, lcmp) \ + RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, lcmp, ) +#define RB_GENERATE_WITH_COMPARE_STATIC(name, type, field, cmp, lcmp) \ + RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, lcmp, static) +#define RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, lcmp, attr) \ + RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ + RB_GENERATE_INSERT(name, type, field, cmp, attr) \ + RB_GENERATE_FIND(name, type, field, cmp, attr) \ + RB_GENERATE_NFIND(name, type, field, cmp, attr) \ + RB_GENERATE_FIND_LIGHT(name, type, field, lcmp, attr) \ + RB_GENERATE_NFIND_LIGHT(name, type, field, lcmp, attr) + +#define RB_GENERATE_ALL(name, type, field, cmp) RB_GENERATE_ALL_INTERNAL(name, type, field, cmp, ) +#define RB_GENERATE_ALL_STATIC(name, type, field, cmp) \ + RB_GENERATE_ALL_INTERNAL(name, type, field, cmp, static) +#define RB_GENERATE_ALL_INTERNAL(name, type, field, cmp, attr) \ + RB_GENERATE_WITHOUT_COMPARE_INTERNAL(name, type, field, attr) \ + RB_GENERATE_WITH_COMPARE_INTERNAL(name, type, field, cmp, attr) + +#define RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ + attr void name##_RB_INSERT_COLOR(struct name* head, struct type* elm) { \ + struct type *parent, *gparent, *tmp; \ + while ((parent = RB_PARENT(elm, field)) != NULL && RB_COLOR(parent, field) == RB_RED) { \ + gparent = RB_PARENT(parent, field); \ + if (parent == RB_LEFT(gparent, field)) { \ + tmp = RB_RIGHT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field); \ + elm = gparent; \ + continue; \ + } \ + if (RB_RIGHT(parent, field) == elm) { \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \ + } else { \ + tmp = RB_LEFT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field); \ + elm = gparent; \ + continue; \ + } \ + if (RB_LEFT(parent, field) == elm) { \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_LEFT(head, gparent, tmp, field); \ + } \ + } \ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \ + } + +#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ + attr void name##_RB_REMOVE_COLOR(struct name* head, struct type* parent, struct type* elm) { \ + struct type* tmp; \ + while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && elm != RB_ROOT(head)) { \ + if (RB_LEFT(parent, field) == elm) { \ + tmp = RB_RIGHT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + tmp = RB_RIGHT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) && \ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) { \ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) { \ + struct type* oleft; \ + if ((oleft = RB_LEFT(tmp, field)) != NULL) \ + RB_COLOR(oleft, field) = RB_BLACK; \ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_RIGHT(head, tmp, oleft, field); \ + tmp = RB_RIGHT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field); \ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_RIGHT(tmp, field)) \ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK; \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + elm = RB_ROOT(head); \ + break; \ + } \ + } else { \ + tmp = RB_LEFT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + tmp = RB_LEFT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) && \ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) { \ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) { \ + struct type* oright; \ + if ((oright = RB_RIGHT(tmp, field)) != NULL) \ + RB_COLOR(oright, field) = RB_BLACK; \ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_LEFT(head, tmp, oright, field); \ + tmp = RB_LEFT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field); \ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_LEFT(tmp, field)) \ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK; \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + } \ + if (elm) \ + RB_COLOR(elm, field) = RB_BLACK; \ + } + +#define RB_GENERATE_REMOVE(name, type, field, attr) \ + attr struct type* name##_RB_REMOVE(struct name* head, struct type* elm) { \ + struct type *child, *parent, *old = elm; \ + int color; \ + if (RB_LEFT(elm, field) == NULL) \ + child = RB_RIGHT(elm, field); \ + else if (RB_RIGHT(elm, field) == NULL) \ + child = RB_LEFT(elm, field); \ + else { \ + struct type* left; \ + elm = RB_RIGHT(elm, field); \ + while ((left = RB_LEFT(elm, field)) != NULL) \ + elm = left; \ + child = RB_RIGHT(elm, field); \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ + if (RB_PARENT(elm, field) == old) \ + parent = elm; \ + (elm)->field = (old)->field; \ + if (RB_PARENT(old, field)) { \ + if (RB_LEFT(RB_PARENT(old, field), field) == old) \ + RB_LEFT(RB_PARENT(old, field), field) = elm; \ + else \ + RB_RIGHT(RB_PARENT(old, field), field) = elm; \ + RB_AUGMENT(RB_PARENT(old, field)); \ + } else \ + RB_ROOT(head) = elm; \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + if (RB_RIGHT(old, field)) \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + if (parent) { \ + left = parent; \ + do { \ + RB_AUGMENT(left); \ + } while ((left = RB_PARENT(left, field)) != NULL); \ + } \ + goto color; \ + } \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ + color: \ + if (color == RB_BLACK) \ + name##_RB_REMOVE_COLOR(head, parent, child); \ + return (old); \ + } + +#define RB_GENERATE_INSERT(name, type, field, cmp, attr) \ + /* Inserts a node into the RB tree */ \ + attr struct type* name##_RB_INSERT(struct name* head, struct type* elm) { \ + struct type* tmp; \ + struct type* parent = NULL; \ + int comp = 0; \ + tmp = RB_ROOT(head); \ + while (tmp) { \ + parent = tmp; \ + comp = (cmp)(elm, parent); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + RB_SET(elm, parent, field); \ + if (parent != NULL) { \ + if (comp < 0) \ + RB_LEFT(parent, field) = elm; \ + else \ + RB_RIGHT(parent, field) = elm; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = elm; \ + name##_RB_INSERT_COLOR(head, elm); \ + return (NULL); \ + } + +#define RB_GENERATE_FIND(name, type, field, cmp, attr) \ + /* Finds the node with the same key as elm */ \ + attr struct type* name##_RB_FIND(struct name* head, struct type* elm) { \ + struct type* tmp = RB_ROOT(head); \ + int comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ + } + +#define RB_GENERATE_NFIND(name, type, field, cmp, attr) \ + /* Finds the first node greater than or equal to the search key */ \ + attr struct type* name##_RB_NFIND(struct name* head, struct type* elm) { \ + struct type* tmp = RB_ROOT(head); \ + struct type* res = NULL; \ + int comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) { \ + res = tmp; \ + tmp = RB_LEFT(tmp, field); \ + } else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (res); \ + } + +#define RB_GENERATE_FIND_LIGHT(name, type, field, lcmp, attr) \ + /* Finds the node with the same key as elm */ \ + attr struct type* name##_RB_FIND_LIGHT(struct name* head, const void* lelm) { \ + struct type* tmp = RB_ROOT(head); \ + int comp; \ + while (tmp) { \ + comp = lcmp(lelm, tmp); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ + } + +#define RB_GENERATE_NFIND_LIGHT(name, type, field, lcmp, attr) \ + /* Finds the first node greater than or equal to the search key */ \ + attr struct type* name##_RB_NFIND_LIGHT(struct name* head, const void* lelm) { \ + struct type* tmp = RB_ROOT(head); \ + struct type* res = NULL; \ + int comp; \ + while (tmp) { \ + comp = lcmp(lelm, tmp); \ + if (comp < 0) { \ + res = tmp; \ + tmp = RB_LEFT(tmp, field); \ + } else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (res); \ + } + +#define RB_GENERATE_NEXT(name, type, field, attr) \ + /* ARGSUSED */ \ + attr struct type* name##_RB_NEXT(struct type* elm) { \ + if (RB_RIGHT(elm, field)) { \ + elm = RB_RIGHT(elm, field); \ + while (RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } else { \ + if (RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else { \ + while (RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ + } + +#define RB_GENERATE_PREV(name, type, field, attr) \ + /* ARGSUSED */ \ + attr struct type* name##_RB_PREV(struct type* elm) { \ + if (RB_LEFT(elm, field)) { \ + elm = RB_LEFT(elm, field); \ + while (RB_RIGHT(elm, field)) \ + elm = RB_RIGHT(elm, field); \ + } else { \ + if (RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else { \ + while (RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ + } + +#define RB_GENERATE_MINMAX(name, type, field, attr) \ + attr struct type* name##_RB_MINMAX(struct name* head, int val) { \ + struct type* tmp = RB_ROOT(head); \ + struct type* parent = NULL; \ + while (tmp) { \ + parent = tmp; \ + if (val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ + } + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) +#define RB_FIND_LIGHT(name, x, y) name##_RB_FIND_LIGHT(x, y) +#define RB_NFIND_LIGHT(name, x, y) name##_RB_NFIND_LIGHT(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(y) +#define RB_PREV(name, x, y) name##_RB_PREV(y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) \ + for ((x) = RB_MIN(name, head); (x) != NULL; (x) = name##_RB_NEXT(x)) + +#define RB_FOREACH_FROM(x, name, y) \ + for ((x) = (y); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_SAFE(x, name, head, y) \ + for ((x) = RB_MIN(name, head); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ + (x) = (y)) + +#define RB_FOREACH_REVERSE(x, name, head) \ + for ((x) = RB_MAX(name, head); (x) != NULL; (x) = name##_RB_PREV(x)) + +#define RB_FOREACH_REVERSE_FROM(x, name, y) \ + for ((x) = (y); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ + for ((x) = RB_MAX(name, head); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ + (x) = (y)) + +#endif /* _SYS_TREE_H_ */ diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 01f3e9419..1b8ad476e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -142,8 +142,6 @@ add_library(core STATIC hardware_interrupt_manager.h hle/ipc.h hle/ipc_helpers.h - hle/kernel/address_arbiter.cpp - hle/kernel/address_arbiter.h hle/kernel/client_port.cpp hle/kernel/client_port.h hle/kernel/client_session.cpp @@ -157,13 +155,19 @@ add_library(core STATIC hle/kernel/handle_table.h hle/kernel/hle_ipc.cpp hle/kernel/hle_ipc.h + hle/kernel/k_address_arbiter.cpp + hle/kernel/k_address_arbiter.h hle/kernel/k_affinity_mask.h + hle/kernel/k_condition_variable.cpp + hle/kernel/k_condition_variable.h hle/kernel/k_priority_queue.h hle/kernel/k_scheduler.cpp hle/kernel/k_scheduler.h hle/kernel/k_scheduler_lock.h hle/kernel/k_scoped_lock.h hle/kernel/k_scoped_scheduler_lock_and_sleep.h + hle/kernel/k_synchronization_object.cpp + hle/kernel/k_synchronization_object.h hle/kernel/kernel.cpp hle/kernel/kernel.h hle/kernel/memory/address_space_info.cpp @@ -183,8 +187,6 @@ add_library(core STATIC hle/kernel/memory/slab_heap.h hle/kernel/memory/system_control.cpp hle/kernel/memory/system_control.h - hle/kernel/mutex.cpp - hle/kernel/mutex.h hle/kernel/object.cpp hle/kernel/object.h hle/kernel/physical_core.cpp @@ -210,12 +212,10 @@ add_library(core STATIC hle/kernel/shared_memory.h hle/kernel/svc.cpp hle/kernel/svc.h + hle/kernel/svc_common.h + hle/kernel/svc_results.h hle/kernel/svc_types.h hle/kernel/svc_wrap.h - hle/kernel/synchronization_object.cpp - hle/kernel/synchronization_object.h - hle/kernel/synchronization.cpp - hle/kernel/synchronization.h hle/kernel/thread.cpp hle/kernel/thread.h hle/kernel/time_manager.cpp @@ -635,6 +635,8 @@ if (MSVC) /we4267 # 'context' : truncation from 'type1' to 'type2' /we4305 + # 'function' : not all control paths return a value + /we4715 ) else() target_compile_options(core PRIVATE diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 70098c526..9a0151736 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -26,9 +26,10 @@ using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CO /// Generic ARMv8 CPU interface class ARM_Interface : NonCopyable { public: - explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock) - : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{ - uses_wall_clock} {} + explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers_, + bool uses_wall_clock_) + : system{system_}, interrupt_handlers{interrupt_handlers_}, uses_wall_clock{ + uses_wall_clock_} {} virtual ~ARM_Interface() = default; struct ThreadContext32 { diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 8aaf11eee..6c4c8e9e4 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -71,15 +71,8 @@ public: } void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { - switch (exception) { - case Dynarmic::A32::Exception::UndefinedInstruction: - case Dynarmic::A32::Exception::UnpredictableInstruction: - break; - case Dynarmic::A32::Exception::Breakpoint: - break; - } LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", - static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); + exception, pc, MemoryReadCode(pc)); UNIMPLEMENTED(); } @@ -181,6 +174,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& if (Settings::values.cpuopt_unsafe_reduce_fp_error) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + } } return std::make_unique<Dynarmic::A32::Jit>(config); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index d2e1dc724..4c5ebca22 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -212,6 +212,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& if (Settings::values.cpuopt_unsafe_reduce_fp_error) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + } } return std::make_shared<Dynarmic::A64::Jit>(config); diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index e6c8461a5..874b5673a 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -49,6 +49,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) { Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); instance.on_thread_init(); instance.ThreadLoop(); + MicroProfileOnThreadExit(); } void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) { diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index da15f764a..cebe2ce37 100644 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp @@ -143,6 +143,7 @@ u64 GetSignatureTypeDataSize(SignatureType type) { return 0x3C; } UNREACHABLE(); + return 0; } u64 GetSignatureTypePaddingSize(SignatureType type) { @@ -157,6 +158,7 @@ u64 GetSignatureTypePaddingSize(SignatureType type) { return 0x40; } UNREACHABLE(); + return 0; } SignatureType Ticket::GetSignatureType() const { @@ -169,8 +171,7 @@ SignatureType Ticket::GetSignatureType() const { if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { return ticket->sig_type; } - - UNREACHABLE(); + throw std::bad_variant_access{}; } TicketData& Ticket::GetData() { @@ -183,8 +184,7 @@ TicketData& Ticket::GetData() { if (auto* ticket = std::get_if<ECDSATicket>(&data)) { return ticket->data; } - - UNREACHABLE(); + throw std::bad_variant_access{}; } const TicketData& Ticket::GetData() const { @@ -197,8 +197,7 @@ const TicketData& Ticket::GetData() const { if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { return ticket->data; } - - UNREACHABLE(); + throw std::bad_variant_access{}; } u64 Ticket::GetSize() const { diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp index adcf0732f..a65ec6798 100644 --- a/src/core/file_sys/nca_patch.cpp +++ b/src/core/file_sys/nca_patch.cpp @@ -51,8 +51,8 @@ std::pair<std::size_t, std::size_t> SearchBucketEntry(u64 offset, const BlockTyp low = mid + 1; } } - UNREACHABLE_MSG("Offset could not be found in BKTR block."); + return {0, 0}; } } // Anonymous namespace diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index da01002d5..431302f55 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -105,7 +105,8 @@ ContentRecordType GetCRTypeFromNCAType(NCAContentType type) { // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal. return ContentRecordType::HtmlDocument; default: - UNREACHABLE_MSG("Invalid NCAContentType={:02X}", static_cast<u8>(type)); + UNREACHABLE_MSG("Invalid NCAContentType={:02X}", type); + return ContentRecordType{}; } } diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h index 5b414b0f0..b08a1687a 100644 --- a/src/core/file_sys/registered_cache.h +++ b/src/core/file_sys/registered_cache.h @@ -67,18 +67,18 @@ public: virtual void Refresh() = 0; virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0; - virtual bool HasEntry(ContentProviderEntry entry) const; + bool HasEntry(ContentProviderEntry entry) const; virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0; virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0; - virtual VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const; + VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const; virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0; - virtual VirtualFile GetEntryRaw(ContentProviderEntry entry) const; + VirtualFile GetEntryRaw(ContentProviderEntry entry) const; virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0; - virtual std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const; + std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const; virtual std::vector<ContentProviderEntry> ListEntries() const; diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp deleted file mode 100644 index 20ffa7d47..000000000 --- a/src/core/hle/kernel/address_arbiter.cpp +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <vector> - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/arm/exclusive_monitor.h" -#include "core/core.h" -#include "core/hle/kernel/address_arbiter.h" -#include "core/hle/kernel/errors.h" -#include "core/hle/kernel/handle_table.h" -#include "core/hle/kernel/k_scheduler.h" -#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/kernel/time_manager.h" -#include "core/hle/result.h" -#include "core/memory.h" - -namespace Kernel { - -// Wake up num_to_wake (or all) threads in a vector. -void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, - s32 num_to_wake) { - // Only process up to 'target' threads, unless 'target' is <= 0, in which case process - // them all. - std::size_t last = waiting_threads.size(); - if (num_to_wake > 0) { - last = std::min(last, static_cast<std::size_t>(num_to_wake)); - } - - // Signal the waiting threads. - for (std::size_t i = 0; i < last; i++) { - waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS); - RemoveThread(waiting_threads[i]); - waiting_threads[i]->WaitForArbitration(false); - waiting_threads[i]->ResumeFromWait(); - } -} - -AddressArbiter::AddressArbiter(Core::System& system) : system{system} {} -AddressArbiter::~AddressArbiter() = default; - -ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value, - s32 num_to_wake) { - switch (type) { - case SignalType::Signal: - return SignalToAddressOnly(address, num_to_wake); - case SignalType::IncrementAndSignalIfEqual: - return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake); - case SignalType::ModifyByWaitingCountAndSignalIfEqual: - return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake); - default: - return ERR_INVALID_ENUM_VALUE; - } -} - -ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { - KScopedSchedulerLock lock(system.Kernel()); - const std::vector<std::shared_ptr<Thread>> waiting_threads = - GetThreadsWaitingOnAddress(address); - WakeThreads(waiting_threads, num_to_wake); - return RESULT_SUCCESS; -} - -ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, - s32 num_to_wake) { - KScopedSchedulerLock lock(system.Kernel()); - auto& memory = system.Memory(); - - // Ensure that we can write to the address. - if (!memory.IsValidVirtualAddress(address)) { - return ERR_INVALID_ADDRESS_STATE; - } - - const std::size_t current_core = system.CurrentCoreIndex(); - auto& monitor = system.Monitor(); - u32 current_value; - do { - current_value = monitor.ExclusiveRead32(current_core, address); - - if (current_value != static_cast<u32>(value)) { - return ERR_INVALID_STATE; - } - current_value++; - } while (!monitor.ExclusiveWrite32(current_core, address, current_value)); - - return SignalToAddressOnly(address, num_to_wake); -} - -ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, - s32 num_to_wake) { - KScopedSchedulerLock lock(system.Kernel()); - auto& memory = system.Memory(); - - // Ensure that we can write to the address. - if (!memory.IsValidVirtualAddress(address)) { - return ERR_INVALID_ADDRESS_STATE; - } - - // Get threads waiting on the address. - const std::vector<std::shared_ptr<Thread>> waiting_threads = - GetThreadsWaitingOnAddress(address); - - const std::size_t current_core = system.CurrentCoreIndex(); - auto& monitor = system.Monitor(); - s32 updated_value; - do { - updated_value = monitor.ExclusiveRead32(current_core, address); - - if (updated_value != value) { - return ERR_INVALID_STATE; - } - // Determine the modified value depending on the waiting count. - if (num_to_wake <= 0) { - if (waiting_threads.empty()) { - updated_value = value + 1; - } else { - updated_value = value - 1; - } - } else { - if (waiting_threads.empty()) { - updated_value = value + 1; - } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) { - updated_value = value - 1; - } else { - updated_value = value; - } - } - } while (!monitor.ExclusiveWrite32(current_core, address, updated_value)); - - WakeThreads(waiting_threads, num_to_wake); - return RESULT_SUCCESS; -} - -ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value, - s64 timeout_ns) { - switch (type) { - case ArbitrationType::WaitIfLessThan: - return WaitForAddressIfLessThan(address, value, timeout_ns, false); - case ArbitrationType::DecrementAndWaitIfLessThan: - return WaitForAddressIfLessThan(address, value, timeout_ns, true); - case ArbitrationType::WaitIfEqual: - return WaitForAddressIfEqual(address, value, timeout_ns); - default: - return ERR_INVALID_ENUM_VALUE; - } -} - -ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, - bool should_decrement) { - auto& memory = system.Memory(); - auto& kernel = system.Kernel(); - Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread(); - - Handle event_handle = InvalidHandle; - { - KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); - - if (current_thread->IsPendingTermination()) { - lock.CancelSleep(); - return ERR_THREAD_TERMINATING; - } - - // Ensure that we can read the address. - if (!memory.IsValidVirtualAddress(address)) { - lock.CancelSleep(); - return ERR_INVALID_ADDRESS_STATE; - } - - s32 current_value = static_cast<s32>(memory.Read32(address)); - if (current_value >= value) { - lock.CancelSleep(); - return ERR_INVALID_STATE; - } - - current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); - - s32 decrement_value; - - const std::size_t current_core = system.CurrentCoreIndex(); - auto& monitor = system.Monitor(); - do { - current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address)); - if (should_decrement) { - decrement_value = current_value - 1; - } else { - decrement_value = current_value; - } - } while ( - !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value))); - - // Short-circuit without rescheduling, if timeout is zero. - if (timeout == 0) { - lock.CancelSleep(); - return RESULT_TIMEOUT; - } - - current_thread->SetArbiterWaitAddress(address); - InsertThread(SharedFrom(current_thread)); - current_thread->SetStatus(ThreadStatus::WaitArb); - current_thread->WaitForArbitration(true); - } - - if (event_handle != InvalidHandle) { - auto& time_manager = kernel.TimeManager(); - time_manager.UnscheduleTimeEvent(event_handle); - } - - { - KScopedSchedulerLock lock(kernel); - if (current_thread->IsWaitingForArbitration()) { - RemoveThread(SharedFrom(current_thread)); - current_thread->WaitForArbitration(false); - } - } - - return current_thread->GetSignalingResult(); -} - -ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { - auto& memory = system.Memory(); - auto& kernel = system.Kernel(); - Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread(); - - Handle event_handle = InvalidHandle; - { - KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); - - if (current_thread->IsPendingTermination()) { - lock.CancelSleep(); - return ERR_THREAD_TERMINATING; - } - - // Ensure that we can read the address. - if (!memory.IsValidVirtualAddress(address)) { - lock.CancelSleep(); - return ERR_INVALID_ADDRESS_STATE; - } - - s32 current_value = static_cast<s32>(memory.Read32(address)); - if (current_value != value) { - lock.CancelSleep(); - return ERR_INVALID_STATE; - } - - // Short-circuit without rescheduling, if timeout is zero. - if (timeout == 0) { - lock.CancelSleep(); - return RESULT_TIMEOUT; - } - - current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); - current_thread->SetArbiterWaitAddress(address); - InsertThread(SharedFrom(current_thread)); - current_thread->SetStatus(ThreadStatus::WaitArb); - current_thread->WaitForArbitration(true); - } - - if (event_handle != InvalidHandle) { - auto& time_manager = kernel.TimeManager(); - time_manager.UnscheduleTimeEvent(event_handle); - } - - { - KScopedSchedulerLock lock(kernel); - if (current_thread->IsWaitingForArbitration()) { - RemoveThread(SharedFrom(current_thread)); - current_thread->WaitForArbitration(false); - } - } - - return current_thread->GetSignalingResult(); -} - -void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { - const VAddr arb_addr = thread->GetArbiterWaitAddress(); - std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - - const auto iter = - std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) { - return entry->GetPriority() >= thread->GetPriority(); - }); - - if (iter == thread_list.cend()) { - thread_list.push_back(std::move(thread)); - } else { - thread_list.insert(iter, std::move(thread)); - } -} - -void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { - const VAddr arb_addr = thread->GetArbiterWaitAddress(); - std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - - const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), - [&thread](const auto& entry) { return thread == entry; }); - - if (iter != thread_list.cend()) { - thread_list.erase(iter); - } -} - -std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( - VAddr address) const { - const auto iter = arb_threads.find(address); - if (iter == arb_threads.cend()) { - return {}; - } - - const std::list<std::shared_ptr<Thread>>& thread_list = iter->second; - return {thread_list.cbegin(), thread_list.cend()}; -} -} // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h deleted file mode 100644 index b91edc67d..000000000 --- a/src/core/hle/kernel/address_arbiter.h +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <list> -#include <memory> -#include <unordered_map> -#include <vector> - -#include "common/common_types.h" - -union ResultCode; - -namespace Core { -class System; -} - -namespace Kernel { - -class Thread; - -class AddressArbiter { -public: - enum class ArbitrationType { - WaitIfLessThan = 0, - DecrementAndWaitIfLessThan = 1, - WaitIfEqual = 2, - }; - - enum class SignalType { - Signal = 0, - IncrementAndSignalIfEqual = 1, - ModifyByWaitingCountAndSignalIfEqual = 2, - }; - - explicit AddressArbiter(Core::System& system); - ~AddressArbiter(); - - AddressArbiter(const AddressArbiter&) = delete; - AddressArbiter& operator=(const AddressArbiter&) = delete; - - AddressArbiter(AddressArbiter&&) = default; - AddressArbiter& operator=(AddressArbiter&&) = delete; - - /// Signals an address being waited on with a particular signaling type. - ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake); - - /// Waits on an address with a particular arbitration type. - ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns); - -private: - /// Signals an address being waited on. - ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake); - - /// Signals an address being waited on and increments its value if equal to the value argument. - ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); - - /// Signals an address being waited on and modifies its value based on waiting thread count if - /// equal to the value argument. - ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, - s32 num_to_wake); - - /// Waits on an address if the value passed is less than the argument value, - /// optionally decrementing. - ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, - bool should_decrement); - - /// Waits on an address if the value passed is equal to the argument value. - ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); - - /// Wake up num_to_wake (or all) threads in a vector. - void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake); - - /// Insert a thread into the address arbiter container - void InsertThread(std::shared_ptr<Thread> thread); - - /// Removes a thread from the address arbiter container - void RemoveThread(std::shared_ptr<Thread> thread); - - // Gets the threads waiting on an address. - std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; - - /// List of threads waiting for a address arbiter - std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; - - Core::System& system; -}; - -} // namespace Kernel diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp index 8aff2227a..f8f005f15 100644 --- a/src/core/hle/kernel/client_port.cpp +++ b/src/core/hle/kernel/client_port.cpp @@ -33,9 +33,6 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() { server_port->AppendPendingSession(std::move(server)); } - // Wake the threads waiting on the ServerPort - server_port->Signal(); - return MakeResult(std::move(client)); } diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index be9eba519..e8e52900d 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp @@ -12,7 +12,7 @@ namespace Kernel { -ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {} +ClientSession::ClientSession(KernelCore& kernel) : KSynchronizationObject{kernel} {} ClientSession::~ClientSession() { // This destructor will be called automatically when the last ClientSession handle is closed by @@ -22,15 +22,6 @@ ClientSession::~ClientSession() { } } -bool ClientSession::ShouldWait(const Thread* thread) const { - UNIMPLEMENTED(); - return {}; -} - -void ClientSession::Acquire(Thread* thread) { - UNIMPLEMENTED(); -} - bool ClientSession::IsSignaled() const { UNIMPLEMENTED(); return true; diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h index e5e0690c2..d5c9ebee8 100644 --- a/src/core/hle/kernel/client_session.h +++ b/src/core/hle/kernel/client_session.h @@ -7,7 +7,7 @@ #include <memory> #include <string> -#include "core/hle/kernel/synchronization_object.h" +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/result.h" union ResultCode; @@ -26,7 +26,7 @@ class KernelCore; class Session; class Thread; -class ClientSession final : public SynchronizationObject { +class ClientSession final : public KSynchronizationObject { public: explicit ClientSession(KernelCore& kernel); ~ClientSession() override; @@ -49,10 +49,6 @@ public: ResultCode SendSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory, Core::Timing::CoreTiming& core_timing); - bool ShouldWait(const Thread* thread) const override; - - void Acquire(Thread* thread) override; - bool IsSignaled() const override; private: diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index d4e5d88cf..7d32a39f0 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -13,12 +13,14 @@ namespace Kernel { constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59}; +constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59}; constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103}; constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104}; constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; +constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106}; constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110}; constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113}; @@ -28,6 +30,7 @@ constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115}; constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116}; constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117}; constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118}; +constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118}; constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119}; constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120}; constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121}; diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp new file mode 100644 index 000000000..d9e702f13 --- /dev/null +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -0,0 +1,367 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/arm/exclusive_monitor.h" +#include "core/core.h" +#include "core/hle/kernel/k_address_arbiter.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/svc_results.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" +#include "core/memory.h" + +namespace Kernel { + +KAddressArbiter::KAddressArbiter(Core::System& system_) + : system{system_}, kernel{system.Kernel()} {} +KAddressArbiter::~KAddressArbiter() = default; + +namespace { + +bool ReadFromUser(Core::System& system, s32* out, VAddr address) { + *out = system.Memory().Read32(address); + return true; +} + +bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 value) { + auto& monitor = system.Monitor(); + const auto current_core = system.CurrentCoreIndex(); + + // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable. + // TODO(bunnei): We should call CanAccessAtomic(..) here. + + // Load the value from the address. + const s32 current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address)); + + // Compare it to the desired one. + if (current_value < value) { + // If less than, we want to try to decrement. + const s32 decrement_value = current_value - 1; + + // Decrement and try to store. + if (!monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value))) { + // If we failed to store, try again. + DecrementIfLessThan(system, out, address, value); + } + } else { + // Otherwise, clear our exclusive hold and finish + monitor.ClearExclusive(); + } + + // We're done. + *out = current_value; + return true; +} + +bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 new_value) { + auto& monitor = system.Monitor(); + const auto current_core = system.CurrentCoreIndex(); + + // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable. + // TODO(bunnei): We should call CanAccessAtomic(..) here. + + // Load the value from the address. + const s32 current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address)); + + // Compare it to the desired one. + if (current_value == value) { + // If equal, we want to try to write the new value. + + // Try to store. + if (!monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(new_value))) { + // If we failed to store, try again. + UpdateIfEqual(system, out, address, value, new_value); + } + } else { + // Otherwise, clear our exclusive hold and finish. + monitor.ClearExclusive(); + } + + // We're done. + *out = current_value; + return true; +} + +} // namespace + +ResultCode KAddressArbiter::Signal(VAddr addr, s32 count) { + // Perform signaling. + s32 num_waiters{}; + { + KScopedSchedulerLock sl(kernel); + + auto it = thread_tree.nfind_light({addr, -1}); + while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && + (it->GetAddressArbiterKey() == addr)) { + Thread* target_thread = std::addressof(*it); + target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); + + ASSERT(target_thread->IsWaitingForAddressArbiter()); + target_thread->Wakeup(); + + it = thread_tree.erase(it); + target_thread->ClearAddressArbiter(); + ++num_waiters; + } + } + return RESULT_SUCCESS; +} + +ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 count) { + // Perform signaling. + s32 num_waiters{}; + { + KScopedSchedulerLock sl(kernel); + + // Check the userspace value. + s32 user_value{}; + R_UNLESS(UpdateIfEqual(system, std::addressof(user_value), addr, value, value + 1), + Svc::ResultInvalidCurrentMemory); + R_UNLESS(user_value == value, Svc::ResultInvalidState); + + auto it = thread_tree.nfind_light({addr, -1}); + while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && + (it->GetAddressArbiterKey() == addr)) { + Thread* target_thread = std::addressof(*it); + target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); + + ASSERT(target_thread->IsWaitingForAddressArbiter()); + target_thread->Wakeup(); + + it = thread_tree.erase(it); + target_thread->ClearAddressArbiter(); + ++num_waiters; + } + } + return RESULT_SUCCESS; +} + +ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 value, s32 count) { + // Perform signaling. + s32 num_waiters{}; + { + KScopedSchedulerLock sl(kernel); + + auto it = thread_tree.nfind_light({addr, -1}); + // Determine the updated value. + s32 new_value{}; + if (/*GetTargetFirmware() >= TargetFirmware_7_0_0*/ true) { + if (count <= 0) { + if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) { + new_value = value - 2; + } else { + new_value = value + 1; + } + } else { + if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) { + auto tmp_it = it; + s32 tmp_num_waiters{}; + while ((++tmp_it != thread_tree.end()) && + (tmp_it->GetAddressArbiterKey() == addr)) { + if ((tmp_num_waiters++) >= count) { + break; + } + } + + if (tmp_num_waiters < count) { + new_value = value - 1; + } else { + new_value = value; + } + } else { + new_value = value + 1; + } + } + } else { + if (count <= 0) { + if ((it != thread_tree.end()) && (it->GetAddressArbiterKey() == addr)) { + new_value = value - 1; + } else { + new_value = value + 1; + } + } else { + auto tmp_it = it; + s32 tmp_num_waiters{}; + while ((tmp_it != thread_tree.end()) && (tmp_it->GetAddressArbiterKey() == addr) && + (tmp_num_waiters < count + 1)) { + ++tmp_num_waiters; + ++tmp_it; + } + + if (tmp_num_waiters == 0) { + new_value = value + 1; + } else if (tmp_num_waiters <= count) { + new_value = value - 1; + } else { + new_value = value; + } + } + } + + // Check the userspace value. + s32 user_value{}; + bool succeeded{}; + if (value != new_value) { + succeeded = UpdateIfEqual(system, std::addressof(user_value), addr, value, new_value); + } else { + succeeded = ReadFromUser(system, std::addressof(user_value), addr); + } + + R_UNLESS(succeeded, Svc::ResultInvalidCurrentMemory); + R_UNLESS(user_value == value, Svc::ResultInvalidState); + + while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && + (it->GetAddressArbiterKey() == addr)) { + Thread* target_thread = std::addressof(*it); + target_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); + + ASSERT(target_thread->IsWaitingForAddressArbiter()); + target_thread->Wakeup(); + + it = thread_tree.erase(it); + target_thread->ClearAddressArbiter(); + ++num_waiters; + } + } + return RESULT_SUCCESS; +} + +ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement, s64 timeout) { + // Prepare to wait. + Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread(); + Handle timer = InvalidHandle; + + { + KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout); + + // Check that the thread isn't terminating. + if (cur_thread->IsTerminationRequested()) { + slp.CancelSleep(); + return Svc::ResultTerminationRequested; + } + + // Set the synced object. + cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + + // Read the value from userspace. + s32 user_value{}; + bool succeeded{}; + if (decrement) { + succeeded = DecrementIfLessThan(system, std::addressof(user_value), addr, value); + } else { + succeeded = ReadFromUser(system, std::addressof(user_value), addr); + } + + if (!succeeded) { + slp.CancelSleep(); + return Svc::ResultInvalidCurrentMemory; + } + + // Check that the value is less than the specified one. + if (user_value >= value) { + slp.CancelSleep(); + return Svc::ResultInvalidState; + } + + // Check that the timeout is non-zero. + if (timeout == 0) { + slp.CancelSleep(); + return Svc::ResultTimedOut; + } + + // Set the arbiter. + cur_thread->SetAddressArbiter(std::addressof(thread_tree), addr); + thread_tree.insert(*cur_thread); + cur_thread->SetState(ThreadState::Waiting); + cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Arbitration); + } + + // Cancel the timer wait. + if (timer != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(timer); + } + + // Remove from the address arbiter. + { + KScopedSchedulerLock sl(kernel); + + if (cur_thread->IsWaitingForAddressArbiter()) { + thread_tree.erase(thread_tree.iterator_to(*cur_thread)); + cur_thread->ClearAddressArbiter(); + } + } + + // Get the result. + KSynchronizationObject* dummy{}; + return cur_thread->GetWaitResult(std::addressof(dummy)); +} + +ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) { + // Prepare to wait. + Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread(); + Handle timer = InvalidHandle; + + { + KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout); + + // Check that the thread isn't terminating. + if (cur_thread->IsTerminationRequested()) { + slp.CancelSleep(); + return Svc::ResultTerminationRequested; + } + + // Set the synced object. + cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + + // Read the value from userspace. + s32 user_value{}; + if (!ReadFromUser(system, std::addressof(user_value), addr)) { + slp.CancelSleep(); + return Svc::ResultInvalidCurrentMemory; + } + + // Check that the value is equal. + if (value != user_value) { + slp.CancelSleep(); + return Svc::ResultInvalidState; + } + + // Check that the timeout is non-zero. + if (timeout == 0) { + slp.CancelSleep(); + return Svc::ResultTimedOut; + } + + // Set the arbiter. + cur_thread->SetAddressArbiter(std::addressof(thread_tree), addr); + thread_tree.insert(*cur_thread); + cur_thread->SetState(ThreadState::Waiting); + cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Arbitration); + } + + // Cancel the timer wait. + if (timer != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(timer); + } + + // Remove from the address arbiter. + { + KScopedSchedulerLock sl(kernel); + + if (cur_thread->IsWaitingForAddressArbiter()) { + thread_tree.erase(thread_tree.iterator_to(*cur_thread)); + cur_thread->ClearAddressArbiter(); + } + } + + // Get the result. + KSynchronizationObject* dummy{}; + return cur_thread->GetWaitResult(std::addressof(dummy)); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_address_arbiter.h b/src/core/hle/kernel/k_address_arbiter.h new file mode 100644 index 000000000..8d379b524 --- /dev/null +++ b/src/core/hle/kernel/k_address_arbiter.h @@ -0,0 +1,70 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/hle/kernel/k_condition_variable.h" +#include "core/hle/kernel/svc_types.h" + +union ResultCode; + +namespace Core { +class System; +} + +namespace Kernel { + +class KernelCore; + +class KAddressArbiter { +public: + using ThreadTree = KConditionVariable::ThreadTree; + + explicit KAddressArbiter(Core::System& system_); + ~KAddressArbiter(); + + [[nodiscard]] ResultCode SignalToAddress(VAddr addr, Svc::SignalType type, s32 value, + s32 count) { + switch (type) { + case Svc::SignalType::Signal: + return Signal(addr, count); + case Svc::SignalType::SignalAndIncrementIfEqual: + return SignalAndIncrementIfEqual(addr, value, count); + case Svc::SignalType::SignalAndModifyByWaitingCountIfEqual: + return SignalAndModifyByWaitingCountIfEqual(addr, value, count); + } + UNREACHABLE(); + return RESULT_UNKNOWN; + } + + [[nodiscard]] ResultCode WaitForAddress(VAddr addr, Svc::ArbitrationType type, s32 value, + s64 timeout) { + switch (type) { + case Svc::ArbitrationType::WaitIfLessThan: + return WaitIfLessThan(addr, value, false, timeout); + case Svc::ArbitrationType::DecrementAndWaitIfLessThan: + return WaitIfLessThan(addr, value, true, timeout); + case Svc::ArbitrationType::WaitIfEqual: + return WaitIfEqual(addr, value, timeout); + } + UNREACHABLE(); + return RESULT_UNKNOWN; + } + +private: + [[nodiscard]] ResultCode Signal(VAddr addr, s32 count); + [[nodiscard]] ResultCode SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 count); + [[nodiscard]] ResultCode SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 value, s32 count); + [[nodiscard]] ResultCode WaitIfLessThan(VAddr addr, s32 value, bool decrement, s64 timeout); + [[nodiscard]] ResultCode WaitIfEqual(VAddr addr, s32 value, s64 timeout); + + ThreadTree thread_tree; + + Core::System& system; + KernelCore& kernel; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp new file mode 100644 index 000000000..49a068310 --- /dev/null +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -0,0 +1,349 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <vector> + +#include "core/arm/exclusive_monitor.h" +#include "core/core.h" +#include "core/hle/kernel/k_condition_variable.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" +#include "core/hle/kernel/k_synchronization_object.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/svc_common.h" +#include "core/hle/kernel/svc_results.h" +#include "core/hle/kernel/thread.h" +#include "core/memory.h" + +namespace Kernel { + +namespace { + +bool ReadFromUser(Core::System& system, u32* out, VAddr address) { + *out = system.Memory().Read32(address); + return true; +} + +bool WriteToUser(Core::System& system, VAddr address, const u32* p) { + system.Memory().Write32(address, *p); + return true; +} + +bool UpdateLockAtomic(Core::System& system, u32* out, VAddr address, u32 if_zero, + u32 new_orr_mask) { + auto& monitor = system.Monitor(); + const auto current_core = system.CurrentCoreIndex(); + + // Load the value from the address. + const auto expected = monitor.ExclusiveRead32(current_core, address); + + // Orr in the new mask. + u32 value = expected | new_orr_mask; + + // If the value is zero, use the if_zero value, otherwise use the newly orr'd value. + if (!expected) { + value = if_zero; + } + + // Try to store. + if (!monitor.ExclusiveWrite32(current_core, address, value)) { + // If we failed to store, try again. + return UpdateLockAtomic(system, out, address, if_zero, new_orr_mask); + } + + // We're done. + *out = expected; + return true; +} + +} // namespace + +KConditionVariable::KConditionVariable(Core::System& system_) + : system{system_}, kernel{system.Kernel()} {} + +KConditionVariable::~KConditionVariable() = default; + +ResultCode KConditionVariable::SignalToAddress(VAddr addr) { + Thread* owner_thread = kernel.CurrentScheduler()->GetCurrentThread(); + + // Signal the address. + { + KScopedSchedulerLock sl(kernel); + + // Remove waiter thread. + s32 num_waiters{}; + Thread* next_owner_thread = + owner_thread->RemoveWaiterByKey(std::addressof(num_waiters), addr); + + // Determine the next tag. + u32 next_value{}; + if (next_owner_thread) { + next_value = next_owner_thread->GetAddressKeyValue(); + if (num_waiters > 1) { + next_value |= Svc::HandleWaitMask; + } + + next_owner_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); + next_owner_thread->Wakeup(); + } + + // Write the value to userspace. + if (!WriteToUser(system, addr, std::addressof(next_value))) { + if (next_owner_thread) { + next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); + } + + return Svc::ResultInvalidCurrentMemory; + } + } + + return RESULT_SUCCESS; +} + +ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 value) { + Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread(); + + // Wait for the address. + { + std::shared_ptr<Thread> owner_thread; + ASSERT(!owner_thread); + { + KScopedSchedulerLock sl(kernel); + cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); + + // Check if the thread should terminate. + R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested); + + { + // Read the tag from userspace. + u32 test_tag{}; + R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr), + Svc::ResultInvalidCurrentMemory); + + // If the tag isn't the handle (with wait mask), we're done. + R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS); + + // Get the lock owner thread. + owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<Thread>(handle); + R_UNLESS(owner_thread, Svc::ResultInvalidHandle); + + // Update the lock. + cur_thread->SetAddressKey(addr, value); + owner_thread->AddWaiter(cur_thread); + cur_thread->SetState(ThreadState::Waiting); + cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::ConditionVar); + cur_thread->SetMutexWaitAddressForDebugging(addr); + } + } + ASSERT(owner_thread); + } + + // Remove the thread as a waiter from the lock owner. + { + KScopedSchedulerLock sl(kernel); + Thread* owner_thread = cur_thread->GetLockOwner(); + if (owner_thread != nullptr) { + owner_thread->RemoveWaiter(cur_thread); + } + } + + // Get the wait result. + KSynchronizationObject* dummy{}; + return cur_thread->GetWaitResult(std::addressof(dummy)); +} + +Thread* KConditionVariable::SignalImpl(Thread* thread) { + // Check pre-conditions. + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + // Update the tag. + VAddr address = thread->GetAddressKey(); + u32 own_tag = thread->GetAddressKeyValue(); + + u32 prev_tag{}; + bool can_access{}; + { + // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable. + // TODO(bunnei): We should call CanAccessAtomic(..) here. + can_access = true; + if (can_access) { + UpdateLockAtomic(system, std::addressof(prev_tag), address, own_tag, + Svc::HandleWaitMask); + } + } + + Thread* thread_to_close = nullptr; + if (can_access) { + if (prev_tag == InvalidHandle) { + // If nobody held the lock previously, we're all good. + thread->SetSyncedObject(nullptr, RESULT_SUCCESS); + thread->Wakeup(); + } else { + // Get the previous owner. + auto owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<Thread>( + prev_tag & ~Svc::HandleWaitMask); + + if (owner_thread) { + // Add the thread as a waiter on the owner. + owner_thread->AddWaiter(thread); + thread_to_close = owner_thread.get(); + } else { + // The lock was tagged with a thread that doesn't exist. + thread->SetSyncedObject(nullptr, Svc::ResultInvalidState); + thread->Wakeup(); + } + } + } else { + // If the address wasn't accessible, note so. + thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); + thread->Wakeup(); + } + + return thread_to_close; +} + +void KConditionVariable::Signal(u64 cv_key, s32 count) { + // Prepare for signaling. + constexpr int MaxThreads = 16; + + // TODO(bunnei): This should just be Thread once we implement KAutoObject instead of using + // std::shared_ptr. + std::vector<std::shared_ptr<Thread>> thread_list; + std::array<Thread*, MaxThreads> thread_array; + s32 num_to_close{}; + + // Perform signaling. + s32 num_waiters{}; + { + KScopedSchedulerLock sl(kernel); + + auto it = thread_tree.nfind_light({cv_key, -1}); + while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && + (it->GetConditionVariableKey() == cv_key)) { + Thread* target_thread = std::addressof(*it); + + if (Thread* thread = SignalImpl(target_thread); thread != nullptr) { + if (num_to_close < MaxThreads) { + thread_array[num_to_close++] = thread; + } else { + thread_list.push_back(SharedFrom(thread)); + } + } + + it = thread_tree.erase(it); + target_thread->ClearConditionVariable(); + ++num_waiters; + } + + // If we have no waiters, clear the has waiter flag. + if (it == thread_tree.end() || it->GetConditionVariableKey() != cv_key) { + const u32 has_waiter_flag{}; + WriteToUser(system, cv_key, std::addressof(has_waiter_flag)); + } + } + + // Close threads in the array. + for (auto i = 0; i < num_to_close; ++i) { + thread_array[i]->Close(); + } + + // Close threads in the list. + for (auto it = thread_list.begin(); it != thread_list.end(); it = thread_list.erase(it)) { + (*it)->Close(); + } +} + +ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout) { + // Prepare to wait. + Thread* cur_thread = kernel.CurrentScheduler()->GetCurrentThread(); + Handle timer = InvalidHandle; + + { + KScopedSchedulerLockAndSleep slp(kernel, timer, cur_thread, timeout); + + // Set the synced object. + cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + + // Check that the thread isn't terminating. + if (cur_thread->IsTerminationRequested()) { + slp.CancelSleep(); + return Svc::ResultTerminationRequested; + } + + // Update the value and process for the next owner. + { + // Remove waiter thread. + s32 num_waiters{}; + Thread* next_owner_thread = + cur_thread->RemoveWaiterByKey(std::addressof(num_waiters), addr); + + // Update for the next owner thread. + u32 next_value{}; + if (next_owner_thread != nullptr) { + // Get the next tag value. + next_value = next_owner_thread->GetAddressKeyValue(); + if (num_waiters > 1) { + next_value |= Svc::HandleWaitMask; + } + + // Wake up the next owner. + next_owner_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); + next_owner_thread->Wakeup(); + } + + // Write to the cv key. + { + const u32 has_waiter_flag = 1; + WriteToUser(system, key, std::addressof(has_waiter_flag)); + // TODO(bunnei): We should call DataMemoryBarrier(..) here. + } + + // Write the value to userspace. + if (!WriteToUser(system, addr, std::addressof(next_value))) { + slp.CancelSleep(); + return Svc::ResultInvalidCurrentMemory; + } + } + + // Update condition variable tracking. + { + cur_thread->SetConditionVariable(std::addressof(thread_tree), addr, key, value); + thread_tree.insert(*cur_thread); + } + + // If the timeout is non-zero, set the thread as waiting. + if (timeout != 0) { + cur_thread->SetState(ThreadState::Waiting); + cur_thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::ConditionVar); + cur_thread->SetMutexWaitAddressForDebugging(addr); + } + } + + // Cancel the timer wait. + if (timer != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(timer); + } + + // Remove from the condition variable. + { + KScopedSchedulerLock sl(kernel); + + if (Thread* owner = cur_thread->GetLockOwner(); owner != nullptr) { + owner->RemoveWaiter(cur_thread); + } + + if (cur_thread->IsWaitingForConditionVariable()) { + thread_tree.erase(thread_tree.iterator_to(*cur_thread)); + cur_thread->ClearConditionVariable(); + } + } + + // Get the result. + KSynchronizationObject* dummy{}; + return cur_thread->GetWaitResult(std::addressof(dummy)); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_condition_variable.h b/src/core/hle/kernel/k_condition_variable.h new file mode 100644 index 000000000..98ed5b323 --- /dev/null +++ b/src/core/hle/kernel/k_condition_variable.h @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/assert.h" +#include "common/common_types.h" + +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/result.h" + +namespace Core { +class System; +} + +namespace Kernel { + +class KConditionVariable { +public: + using ThreadTree = typename Thread::ConditionVariableThreadTreeType; + + explicit KConditionVariable(Core::System& system_); + ~KConditionVariable(); + + // Arbitration + [[nodiscard]] ResultCode SignalToAddress(VAddr addr); + [[nodiscard]] ResultCode WaitForAddress(Handle handle, VAddr addr, u32 value); + + // Condition variable + void Signal(u64 cv_key, s32 count); + [[nodiscard]] ResultCode Wait(VAddr addr, u64 key, u32 value, s64 timeout); + +private: + [[nodiscard]] Thread* SignalImpl(Thread* thread); + + ThreadTree thread_tree; + + Core::System& system; + KernelCore& kernel; +}; + +inline void BeforeUpdatePriority(const KernelCore& kernel, KConditionVariable::ThreadTree* tree, + Thread* thread) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + tree->erase(tree->iterator_to(*thread)); +} + +inline void AfterUpdatePriority(const KernelCore& kernel, KConditionVariable::ThreadTree* tree, + Thread* thread) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + tree->insert(*thread); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index c5fd82a6b..42f0ea483 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -180,22 +180,22 @@ u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) { return cores_needing_scheduling; } -void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state) { +void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, ThreadState old_state) { ASSERT(kernel.GlobalSchedulerContext().IsLocked()); // Check if the state has changed, because if it hasn't there's nothing to do. - const auto cur_state = thread->scheduling_state; + const auto cur_state = thread->GetRawState(); if (cur_state == old_state) { return; } // Update the priority queues. - if (old_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + if (old_state == ThreadState::Runnable) { // If we were previously runnable, then we're not runnable now, and we should remove. GetPriorityQueue(kernel).Remove(thread); IncrementScheduledCount(thread); SetSchedulerUpdateNeeded(kernel); - } else if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + } else if (cur_state == ThreadState::Runnable) { // If we're now runnable, then we weren't previously, and we should add. GetPriorityQueue(kernel).PushBack(thread); IncrementScheduledCount(thread); @@ -203,13 +203,11 @@ void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 ol } } -void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread, - u32 old_priority) { - +void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, s32 old_priority) { ASSERT(kernel.GlobalSchedulerContext().IsLocked()); // If the thread is runnable, we want to change its priority in the queue. - if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + if (thread->GetRawState() == ThreadState::Runnable) { GetPriorityQueue(kernel).ChangePriority( old_priority, thread == kernel.CurrentScheduler()->GetCurrentThread(), thread); IncrementScheduledCount(thread); @@ -222,7 +220,7 @@ void KScheduler::OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread, ASSERT(kernel.GlobalSchedulerContext().IsLocked()); // If the thread is runnable, we want to change its affinity in the queue. - if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + if (thread->GetRawState() == ThreadState::Runnable) { GetPriorityQueue(kernel).ChangeAffinityMask(old_core, old_affinity, thread); IncrementScheduledCount(thread); SetSchedulerUpdateNeeded(kernel); @@ -292,7 +290,7 @@ void KScheduler::RotateScheduledQueue(s32 core_id, s32 priority) { // If the best thread we can choose has a priority the same or worse than ours, try to // migrate a higher priority thread. - if (best_thread != nullptr && best_thread->GetPriority() >= static_cast<u32>(priority)) { + if (best_thread != nullptr && best_thread->GetPriority() >= priority) { Thread* suggested = priority_queue.GetSuggestedFront(core_id); while (suggested != nullptr) { // If the suggestion's priority is the same as ours, don't bother. @@ -395,8 +393,8 @@ void KScheduler::YieldWithoutCoreMigration() { { KScopedSchedulerLock lock(kernel); - const auto cur_state = cur_thread.scheduling_state; - if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + const auto cur_state = cur_thread.GetRawState(); + if (cur_state == ThreadState::Runnable) { // Put the current thread at the back of the queue. Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread)); IncrementScheduledCount(std::addressof(cur_thread)); @@ -436,8 +434,8 @@ void KScheduler::YieldWithCoreMigration() { { KScopedSchedulerLock lock(kernel); - const auto cur_state = cur_thread.scheduling_state; - if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + const auto cur_state = cur_thread.GetRawState(); + if (cur_state == ThreadState::Runnable) { // Get the current active core. const s32 core_id = cur_thread.GetActiveCore(); @@ -526,8 +524,8 @@ void KScheduler::YieldToAnyThread() { { KScopedSchedulerLock lock(kernel); - const auto cur_state = cur_thread.scheduling_state; - if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + const auto cur_state = cur_thread.GetRawState(); + if (cur_state == ThreadState::Runnable) { // Get the current active core. const s32 core_id = cur_thread.GetActiveCore(); @@ -645,8 +643,7 @@ void KScheduler::Unload(Thread* thread) { void KScheduler::Reload(Thread* thread) { if (thread) { - ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable, - "Thread must be runnable."); + ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable."); // Cancel any outstanding wakeup events for this thread thread->SetIsRunning(true); @@ -725,7 +722,7 @@ void KScheduler::SwitchToCurrent() { do { if (current_thread != nullptr && !current_thread->IsHLEThread()) { current_thread->context_guard.lock(); - if (!current_thread->IsRunnable()) { + if (current_thread->GetRawState() != ThreadState::Runnable) { current_thread->context_guard.unlock(); break; } @@ -772,7 +769,7 @@ void KScheduler::Initialize() { { KScopedSchedulerLock lock{system.Kernel()}; - idle_thread->SetStatus(ThreadStatus::Ready); + idle_thread->SetState(ThreadState::Runnable); } } diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h index e84abc84c..783665123 100644 --- a/src/core/hle/kernel/k_scheduler.h +++ b/src/core/hle/kernel/k_scheduler.h @@ -100,11 +100,10 @@ public: void YieldToAnyThread(); /// Notify the scheduler a thread's status has changed. - static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state); + static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, ThreadState old_state); /// Notify the scheduler a thread's priority has changed. - static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread, - u32 old_priority); + static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, s32 old_priority); /// Notify the scheduler a thread's core and/or affinity mask has changed. static void OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread, diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h index 2f1c1f691..9b40bd22c 100644 --- a/src/core/hle/kernel/k_scheduler_lock.h +++ b/src/core/hle/kernel/k_scheduler_lock.h @@ -19,7 +19,7 @@ class KernelCore; template <typename SchedulerType> class KAbstractSchedulerLock { public: - explicit KAbstractSchedulerLock(KernelCore& kernel) : kernel{kernel} {} + explicit KAbstractSchedulerLock(KernelCore& kernel_) : kernel{kernel_} {} bool IsLockedByCurrentThread() const { return this->owner_thread == kernel.GetCurrentEmuThreadID(); diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp new file mode 100644 index 000000000..1c508cb55 --- /dev/null +++ b/src/core/hle/kernel/k_synchronization_object.cpp @@ -0,0 +1,172 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" +#include "core/hle/kernel/k_synchronization_object.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/svc_results.h" +#include "core/hle/kernel/thread.h" + +namespace Kernel { + +ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index, + KSynchronizationObject** objects, const s32 num_objects, + s64 timeout) { + // Allocate space on stack for thread nodes. + std::vector<ThreadListNode> thread_nodes(num_objects); + + // Prepare for wait. + Thread* thread = kernel.CurrentScheduler()->GetCurrentThread(); + Handle timer = InvalidHandle; + + { + // Setup the scheduling lock and sleep. + KScopedSchedulerLockAndSleep slp(kernel, timer, thread, timeout); + + // Check if any of the objects are already signaled. + for (auto i = 0; i < num_objects; ++i) { + ASSERT(objects[i] != nullptr); + + if (objects[i]->IsSignaled()) { + *out_index = i; + slp.CancelSleep(); + return RESULT_SUCCESS; + } + } + + // Check if the timeout is zero. + if (timeout == 0) { + slp.CancelSleep(); + return Svc::ResultTimedOut; + } + + // Check if the thread should terminate. + if (thread->IsTerminationRequested()) { + slp.CancelSleep(); + return Svc::ResultTerminationRequested; + } + + // Check if waiting was canceled. + if (thread->IsWaitCancelled()) { + slp.CancelSleep(); + thread->ClearWaitCancelled(); + return Svc::ResultCancelled; + } + + // Add the waiters. + for (auto i = 0; i < num_objects; ++i) { + thread_nodes[i].thread = thread; + thread_nodes[i].next = nullptr; + + if (objects[i]->thread_list_tail == nullptr) { + objects[i]->thread_list_head = std::addressof(thread_nodes[i]); + } else { + objects[i]->thread_list_tail->next = std::addressof(thread_nodes[i]); + } + + objects[i]->thread_list_tail = std::addressof(thread_nodes[i]); + } + + // For debugging only + thread->SetWaitObjectsForDebugging({objects, static_cast<std::size_t>(num_objects)}); + + // Mark the thread as waiting. + thread->SetCancellable(); + thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + thread->SetState(ThreadState::Waiting); + thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization); + } + + // The lock/sleep is done, so we should be able to get our result. + + // Thread is no longer cancellable. + thread->ClearCancellable(); + + // For debugging only + thread->SetWaitObjectsForDebugging({}); + + // Cancel the timer as needed. + if (timer != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(timer); + } + + // Get the wait result. + ResultCode wait_result{RESULT_SUCCESS}; + s32 sync_index = -1; + { + KScopedSchedulerLock lock(kernel); + KSynchronizationObject* synced_obj; + wait_result = thread->GetWaitResult(std::addressof(synced_obj)); + + for (auto i = 0; i < num_objects; ++i) { + // Unlink the object from the list. + ThreadListNode* prev_ptr = + reinterpret_cast<ThreadListNode*>(std::addressof(objects[i]->thread_list_head)); + ThreadListNode* prev_val = nullptr; + ThreadListNode *prev, *tail_prev; + + do { + prev = prev_ptr; + prev_ptr = prev_ptr->next; + tail_prev = prev_val; + prev_val = prev_ptr; + } while (prev_ptr != std::addressof(thread_nodes[i])); + + if (objects[i]->thread_list_tail == std::addressof(thread_nodes[i])) { + objects[i]->thread_list_tail = tail_prev; + } + + prev->next = thread_nodes[i].next; + + if (objects[i] == synced_obj) { + sync_index = i; + } + } + } + + // Set output. + *out_index = sync_index; + return wait_result; +} + +KSynchronizationObject::KSynchronizationObject(KernelCore& kernel) : Object{kernel} {} + +KSynchronizationObject ::~KSynchronizationObject() = default; + +void KSynchronizationObject::NotifyAvailable(ResultCode result) { + KScopedSchedulerLock lock(kernel); + + // If we're not signaled, we've nothing to notify. + if (!this->IsSignaled()) { + return; + } + + // Iterate over each thread. + for (auto* cur_node = thread_list_head; cur_node != nullptr; cur_node = cur_node->next) { + Thread* thread = cur_node->thread; + if (thread->GetState() == ThreadState::Waiting) { + thread->SetSyncedObject(this, result); + thread->SetState(ThreadState::Runnable); + } + } +} + +std::vector<Thread*> KSynchronizationObject::GetWaitingThreadsForDebugging() const { + std::vector<Thread*> threads; + + // If debugging, dump the list of waiters. + { + KScopedSchedulerLock lock(kernel); + for (auto* cur_node = thread_list_head; cur_node != nullptr; cur_node = cur_node->next) { + threads.emplace_back(cur_node->thread); + } + } + + return threads; +} +} // namespace Kernel diff --git a/src/core/hle/kernel/k_synchronization_object.h b/src/core/hle/kernel/k_synchronization_object.h new file mode 100644 index 000000000..14d80ebf1 --- /dev/null +++ b/src/core/hle/kernel/k_synchronization_object.h @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "core/hle/kernel/object.h" +#include "core/hle/result.h" + +namespace Kernel { + +class KernelCore; +class Synchronization; +class Thread; + +/// Class that represents a Kernel object that a thread can be waiting on +class KSynchronizationObject : public Object { +public: + struct ThreadListNode { + ThreadListNode* next{}; + Thread* thread{}; + }; + + [[nodiscard]] static ResultCode Wait(KernelCore& kernel, s32* out_index, + KSynchronizationObject** objects, const s32 num_objects, + s64 timeout); + + [[nodiscard]] virtual bool IsSignaled() const = 0; + + [[nodiscard]] std::vector<Thread*> GetWaitingThreadsForDebugging() const; + +protected: + explicit KSynchronizationObject(KernelCore& kernel); + virtual ~KSynchronizationObject(); + + void NotifyAvailable(ResultCode result); + void NotifyAvailable() { + return this->NotifyAvailable(RESULT_SUCCESS); + } + +private: + ThreadListNode* thread_list_head{}; + ThreadListNode* thread_list_tail{}; +}; + +// Specialization of DynamicObjectCast for KSynchronizationObjects +template <> +inline std::shared_ptr<KSynchronizationObject> DynamicObjectCast<KSynchronizationObject>( + std::shared_ptr<Object> object) { + if (object != nullptr && object->IsWaitable()) { + return std::static_pointer_cast<KSynchronizationObject>(object); + } + return nullptr; +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index e8ece8164..c0ff287a6 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -38,7 +38,6 @@ #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/service_thread.h" #include "core/hle/kernel/shared_memory.h" -#include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" #include "core/hle/lock.h" @@ -51,8 +50,7 @@ namespace Kernel { struct KernelCore::Impl { explicit Impl(Core::System& system, KernelCore& kernel) - : synchronization{system}, time_manager{system}, global_handle_table{kernel}, system{ - system} {} + : time_manager{system}, global_handle_table{kernel}, system{system} {} void SetMulticore(bool is_multicore) { this->is_multicore = is_multicore; @@ -307,7 +305,6 @@ struct KernelCore::Impl { std::vector<std::shared_ptr<Process>> process_list; Process* current_process = nullptr; std::unique_ptr<Kernel::GlobalSchedulerContext> global_scheduler_context; - Kernel::Synchronization synchronization; Kernel::TimeManager time_manager; std::shared_ptr<ResourceLimit> system_resource_limit; @@ -461,14 +458,6 @@ const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Kern return impl->interrupts; } -Kernel::Synchronization& KernelCore::Synchronization() { - return impl->synchronization; -} - -const Kernel::Synchronization& KernelCore::Synchronization() const { - return impl->synchronization; -} - Kernel::TimeManager& KernelCore::TimeManager() { return impl->time_manager; } @@ -613,9 +602,11 @@ void KernelCore::Suspend(bool in_suspention) { const bool should_suspend = exception_exited || in_suspention; { KScopedSchedulerLock lock(*this); - ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep; + const auto state = should_suspend ? ThreadState::Runnable : ThreadState::Waiting; for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { - impl->suspend_threads[i]->SetStatus(status); + impl->suspend_threads[i]->SetState(state); + impl->suspend_threads[i]->SetWaitReasonForDebugging( + ThreadWaitReasonForDebugging::Suspended); } } } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index e3169f5a7..933d9a7d6 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -33,7 +33,6 @@ template <typename T> class SlabHeap; } // namespace Memory -class AddressArbiter; class ClientPort; class GlobalSchedulerContext; class HandleTable; @@ -129,12 +128,6 @@ public: /// Gets the an instance of the current physical CPU core. const Kernel::PhysicalCore& CurrentPhysicalCore() const; - /// Gets the an instance of the Synchronization Interface. - Kernel::Synchronization& Synchronization(); - - /// Gets the an instance of the Synchronization Interface. - const Kernel::Synchronization& Synchronization() const; - /// Gets the an instance of the TimeManager Interface. Kernel::TimeManager& TimeManager(); diff --git a/src/core/hle/kernel/memory/address_space_info.cpp b/src/core/hle/kernel/memory/address_space_info.cpp index e4288cab4..6cf43ba24 100644 --- a/src/core/hle/kernel/memory/address_space_info.cpp +++ b/src/core/hle/kernel/memory/address_space_info.cpp @@ -96,6 +96,7 @@ u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) { return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].address; } UNREACHABLE(); + return 0; } std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) { @@ -112,6 +113,7 @@ std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].size; } UNREACHABLE(); + return 0; } } // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_layout.h b/src/core/hle/kernel/memory/memory_layout.h index 9b3d6267a..c7c0b2f49 100644 --- a/src/core/hle/kernel/memory/memory_layout.h +++ b/src/core/hle/kernel/memory/memory_layout.h @@ -5,9 +5,28 @@ #pragma once #include "common/common_types.h" +#include "core/device_memory.h" namespace Kernel::Memory { +constexpr std::size_t KernelAslrAlignment = 2 * 1024 * 1024; +constexpr std::size_t KernelVirtualAddressSpaceWidth = 1ULL << 39; +constexpr std::size_t KernelPhysicalAddressSpaceWidth = 1ULL << 48; +constexpr std::size_t KernelVirtualAddressSpaceBase = 0ULL - KernelVirtualAddressSpaceWidth; +constexpr std::size_t KernelVirtualAddressSpaceEnd = + KernelVirtualAddressSpaceBase + (KernelVirtualAddressSpaceWidth - KernelAslrAlignment); +constexpr std::size_t KernelVirtualAddressSpaceLast = KernelVirtualAddressSpaceEnd - 1; +constexpr std::size_t KernelVirtualAddressSpaceSize = + KernelVirtualAddressSpaceEnd - KernelVirtualAddressSpaceBase; + +constexpr bool IsKernelAddressKey(VAddr key) { + return KernelVirtualAddressSpaceBase <= key && key <= KernelVirtualAddressSpaceLast; +} + +constexpr bool IsKernelAddress(VAddr address) { + return KernelVirtualAddressSpaceBase <= address && address < KernelVirtualAddressSpaceEnd; +} + class MemoryRegion final { friend class MemoryLayout; diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp deleted file mode 100644 index 4f8075e0e..000000000 --- a/src/core/hle/kernel/mutex.cpp +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <memory> -#include <utility> -#include <vector> - -#include "common/assert.h" -#include "common/logging/log.h" -#include "core/core.h" -#include "core/hle/kernel/errors.h" -#include "core/hle/kernel/handle_table.h" -#include "core/hle/kernel/k_scheduler.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/mutex.h" -#include "core/hle/kernel/object.h" -#include "core/hle/kernel/process.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/result.h" -#include "core/memory.h" - -namespace Kernel { - -/// Returns the number of threads that are waiting for a mutex, and the highest priority one among -/// those. -static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThread( - const std::shared_ptr<Thread>& current_thread, VAddr mutex_addr) { - - std::shared_ptr<Thread> highest_priority_thread; - u32 num_waiters = 0; - - for (const auto& thread : current_thread->GetMutexWaitingThreads()) { - if (thread->GetMutexWaitAddress() != mutex_addr) - continue; - - ++num_waiters; - if (highest_priority_thread == nullptr || - thread->GetPriority() < highest_priority_thread->GetPriority()) { - highest_priority_thread = thread; - } - } - - return {highest_priority_thread, num_waiters}; -} - -/// Update the mutex owner field of all threads waiting on the mutex to point to the new owner. -static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread, - std::shared_ptr<Thread> new_owner) { - current_thread->RemoveMutexWaiter(new_owner); - const auto threads = current_thread->GetMutexWaitingThreads(); - for (const auto& thread : threads) { - if (thread->GetMutexWaitAddress() != mutex_addr) - continue; - - ASSERT(thread->GetLockOwner() == current_thread.get()); - current_thread->RemoveMutexWaiter(thread); - if (new_owner != thread) - new_owner->AddMutexWaiter(thread); - } -} - -Mutex::Mutex(Core::System& system) : system{system} {} -Mutex::~Mutex() = default; - -ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle, - Handle requesting_thread_handle) { - // The mutex address must be 4-byte aligned - if ((address % sizeof(u32)) != 0) { - LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address); - return ERR_INVALID_ADDRESS; - } - - auto& kernel = system.Kernel(); - std::shared_ptr<Thread> current_thread = - SharedFrom(kernel.CurrentScheduler()->GetCurrentThread()); - { - KScopedSchedulerLock lock(kernel); - // The mutex address must be 4-byte aligned - if ((address % sizeof(u32)) != 0) { - return ERR_INVALID_ADDRESS; - } - - const auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); - std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); - std::shared_ptr<Thread> requesting_thread = - handle_table.Get<Thread>(requesting_thread_handle); - - // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of - // another thread. - ASSERT(requesting_thread == current_thread); - - current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS); - - const u32 addr_value = system.Memory().Read32(address); - - // If the mutex isn't being held, just return success. - if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) { - return RESULT_SUCCESS; - } - - if (holding_thread == nullptr) { - return ERR_INVALID_HANDLE; - } - - // Wait until the mutex is released - current_thread->SetMutexWaitAddress(address); - current_thread->SetWaitHandle(requesting_thread_handle); - - current_thread->SetStatus(ThreadStatus::WaitMutex); - - // Update the lock holder thread's priority to prevent priority inversion. - holding_thread->AddMutexWaiter(current_thread); - } - - { - KScopedSchedulerLock lock(kernel); - auto* owner = current_thread->GetLockOwner(); - if (owner != nullptr) { - owner->RemoveMutexWaiter(current_thread); - } - } - return current_thread->GetSignalingResult(); -} - -std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner, - VAddr address) { - // The mutex address must be 4-byte aligned - if ((address % sizeof(u32)) != 0) { - LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address); - return {ERR_INVALID_ADDRESS, nullptr}; - } - - auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address); - if (new_owner == nullptr) { - system.Memory().Write32(address, 0); - return {RESULT_SUCCESS, nullptr}; - } - // Transfer the ownership of the mutex from the previous owner to the new one. - TransferMutexOwnership(address, owner, new_owner); - u32 mutex_value = new_owner->GetWaitHandle(); - if (num_waiters >= 2) { - // Notify the guest that there are still some threads waiting for the mutex - mutex_value |= Mutex::MutexHasWaitersFlag; - } - new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS); - new_owner->SetLockOwner(nullptr); - new_owner->ResumeFromWait(); - - system.Memory().Write32(address, mutex_value); - return {RESULT_SUCCESS, new_owner}; -} - -ResultCode Mutex::Release(VAddr address) { - auto& kernel = system.Kernel(); - KScopedSchedulerLock lock(kernel); - - std::shared_ptr<Thread> current_thread = - SharedFrom(kernel.CurrentScheduler()->GetCurrentThread()); - - auto [result, new_owner] = Unlock(current_thread, address); - - if (result != RESULT_SUCCESS && new_owner != nullptr) { - new_owner->SetSynchronizationResults(nullptr, result); - } - - return result; -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h deleted file mode 100644 index 3b81dc3df..000000000 --- a/src/core/hle/kernel/mutex.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -union ResultCode; - -namespace Core { -class System; -} - -namespace Kernel { - -class Mutex final { -public: - explicit Mutex(Core::System& system); - ~Mutex(); - - /// Flag that indicates that a mutex still has threads waiting for it. - static constexpr u32 MutexHasWaitersFlag = 0x40000000; - /// Mask of the bits in a mutex address value that contain the mutex owner. - static constexpr u32 MutexOwnerMask = 0xBFFFFFFF; - - /// Attempts to acquire a mutex at the specified address. - ResultCode TryAcquire(VAddr address, Handle holding_thread_handle, - Handle requesting_thread_handle); - - /// Unlocks a mutex for owner at address - std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner, - VAddr address); - - /// Releases the mutex at the specified address. - ResultCode Release(VAddr address); - -private: - Core::System& system; -}; - -} // namespace Kernel diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h index e3391e2af..27124ef67 100644 --- a/src/core/hle/kernel/object.h +++ b/src/core/hle/kernel/object.h @@ -50,6 +50,11 @@ public: } virtual HandleType GetHandleType() const = 0; + void Close() { + // TODO(bunnei): This is a placeholder to decrement the reference count, which we will use + // when we implement KAutoObject instead of using shared_ptr. + } + /** * Check if a thread can wait on the object * @return True if a thread can wait on the object, otherwise false diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index b905b486a..37b77fa6e 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -55,7 +55,7 @@ void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires { KScopedSchedulerLock lock{kernel}; - thread->SetStatus(ThreadStatus::Ready); + thread->SetState(ThreadState::Runnable); } } } // Anonymous namespace @@ -162,48 +162,6 @@ u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage(); } -void Process::InsertConditionVariableThread(std::shared_ptr<Thread> thread) { - VAddr cond_var_addr = thread->GetCondVarWaitAddress(); - std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread> current_thread = *it; - if (current_thread->GetPriority() > thread->GetPriority()) { - thread_list.insert(it, thread); - return; - } - ++it; - } - thread_list.push_back(thread); -} - -void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) { - VAddr cond_var_addr = thread->GetCondVarWaitAddress(); - std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread> current_thread = *it; - if (current_thread.get() == thread.get()) { - thread_list.erase(it); - return; - } - ++it; - } -} - -std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads( - const VAddr cond_var_addr) { - std::vector<std::shared_ptr<Thread>> result{}; - std::list<std::shared_ptr<Thread>>& thread_list = cond_var_threads[cond_var_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - std::shared_ptr<Thread> current_thread = *it; - result.push_back(current_thread); - ++it; - } - return result; -} - void Process::RegisterThread(const Thread* thread) { thread_list.push_back(thread); } @@ -318,7 +276,7 @@ void Process::PrepareForTermination() { continue; // TODO(Subv): When are the other running/ready threads terminated? - ASSERT_MSG(thread->GetStatus() == ThreadStatus::WaitSynch, + ASSERT_MSG(thread->GetState() == ThreadState::Waiting, "Exiting processes with non-waiting threads is currently unimplemented"); thread->Stop(); @@ -406,21 +364,18 @@ void Process::LoadModule(CodeSet code_set, VAddr base_addr) { ReprotectSegment(code_set.DataSegment(), Memory::MemoryPermission::ReadAndWrite); } +bool Process::IsSignaled() const { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + return is_signaled; +} + Process::Process(Core::System& system) - : SynchronizationObject{system.Kernel()}, page_table{std::make_unique<Memory::PageTable>( - system)}, - handle_table{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {} + : KSynchronizationObject{system.Kernel()}, + page_table{std::make_unique<Memory::PageTable>(system)}, handle_table{system.Kernel()}, + address_arbiter{system}, condition_var{system}, system{system} {} Process::~Process() = default; -void Process::Acquire(Thread* thread) { - ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); -} - -bool Process::ShouldWait(const Thread* thread) const { - return !is_signaled; -} - void Process::ChangeStatus(ProcessStatus new_status) { if (status == new_status) { return; @@ -428,7 +383,7 @@ void Process::ChangeStatus(ProcessStatus new_status) { status = new_status; is_signaled = true; - Signal(); + NotifyAvailable(); } ResultCode Process::AllocateMainThreadStack(std::size_t stack_size) { diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index e412e58aa..564e1f27d 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -11,11 +11,11 @@ #include <unordered_map> #include <vector> #include "common/common_types.h" -#include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/handle_table.h" -#include "core/hle/kernel/mutex.h" +#include "core/hle/kernel/k_address_arbiter.h" +#include "core/hle/kernel/k_condition_variable.h" +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/process_capability.h" -#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Core { @@ -63,7 +63,7 @@ enum class ProcessStatus { DebugBreak, }; -class Process final : public SynchronizationObject { +class Process final : public KSynchronizationObject { public: explicit Process(Core::System& system); ~Process() override; @@ -123,24 +123,30 @@ public: return handle_table; } - /// Gets a reference to the process' address arbiter. - AddressArbiter& GetAddressArbiter() { - return address_arbiter; + ResultCode SignalToAddress(VAddr address) { + return condition_var.SignalToAddress(address); } - /// Gets a const reference to the process' address arbiter. - const AddressArbiter& GetAddressArbiter() const { - return address_arbiter; + ResultCode WaitForAddress(Handle handle, VAddr address, u32 tag) { + return condition_var.WaitForAddress(handle, address, tag); } - /// Gets a reference to the process' mutex lock. - Mutex& GetMutex() { - return mutex; + void SignalConditionVariable(u64 cv_key, int32_t count) { + return condition_var.Signal(cv_key, count); } - /// Gets a const reference to the process' mutex lock - const Mutex& GetMutex() const { - return mutex; + ResultCode WaitConditionVariable(VAddr address, u64 cv_key, u32 tag, s64 ns) { + return condition_var.Wait(address, cv_key, tag, ns); + } + + ResultCode SignalAddressArbiter(VAddr address, Svc::SignalType signal_type, s32 value, + s32 count) { + return address_arbiter.SignalToAddress(address, signal_type, value, count); + } + + ResultCode WaitAddressArbiter(VAddr address, Svc::ArbitrationType arb_type, s32 value, + s64 timeout) { + return address_arbiter.WaitForAddress(address, arb_type, value, timeout); } /// Gets the address to the process' dedicated TLS region. @@ -250,15 +256,6 @@ public: return thread_list; } - /// Insert a thread into the condition variable wait container - void InsertConditionVariableThread(std::shared_ptr<Thread> thread); - - /// Remove a thread from the condition variable wait container - void RemoveConditionVariableThread(std::shared_ptr<Thread> thread); - - /// Obtain all condition variable threads waiting for some address - std::vector<std::shared_ptr<Thread>> GetConditionVariableThreads(VAddr cond_var_addr); - /// Registers a thread as being created under this process, /// adding it to this process' thread list. void RegisterThread(const Thread* thread); @@ -304,6 +301,8 @@ public: void LoadModule(CodeSet code_set, VAddr base_addr); + bool IsSignaled() const override; + /////////////////////////////////////////////////////////////////////////////////////////////// // Thread-local storage management @@ -314,12 +313,6 @@ public: void FreeTLSRegion(VAddr tls_address); private: - /// Checks if the specified thread should wait until this process is available. - bool ShouldWait(const Thread* thread) const override; - - /// Acquires/locks this process for the specified thread if it's available. - void Acquire(Thread* thread) override; - /// Changes the process status. If the status is different /// from the current process status, then this will trigger /// a process signal. @@ -373,12 +366,12 @@ private: HandleTable handle_table; /// Per-process address arbiter. - AddressArbiter address_arbiter; + KAddressArbiter address_arbiter; /// The per-process mutex lock instance used for handling various /// forms of services, such as lock arbitration, and condition /// variable related facilities. - Mutex mutex; + KConditionVariable condition_var; /// Address indicating the location of the process' dedicated TLS region. VAddr tls_region_address = 0; @@ -389,9 +382,6 @@ private: /// List of threads that are running with this process as their owner. std::list<const Thread*> thread_list; - /// List of threads waiting for a condition variable - std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> cond_var_threads; - /// Address of the top of the main thread's stack VAddr main_thread_stack_top{}; @@ -410,6 +400,8 @@ private: /// Schedule count of this process s64 schedule_count{}; + bool is_signaled{}; + /// System context Core::System& system; }; diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index cea262ce0..99ed0857e 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -14,24 +14,22 @@ namespace Kernel { -ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {} +ReadableEvent::ReadableEvent(KernelCore& kernel) : KSynchronizationObject{kernel} {} ReadableEvent::~ReadableEvent() = default; -bool ReadableEvent::ShouldWait(const Thread* thread) const { - return !is_signaled; -} - -void ReadableEvent::Acquire(Thread* thread) { - ASSERT_MSG(IsSignaled(), "object unavailable!"); -} - void ReadableEvent::Signal() { if (is_signaled) { return; } is_signaled = true; - SynchronizationObject::Signal(); + NotifyAvailable(); +} + +bool ReadableEvent::IsSignaled() const { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + return is_signaled; } void ReadableEvent::Clear() { diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h index 3264dd066..34e477274 100644 --- a/src/core/hle/kernel/readable_event.h +++ b/src/core/hle/kernel/readable_event.h @@ -4,8 +4,8 @@ #pragma once +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/synchronization_object.h" union ResultCode; @@ -14,7 +14,7 @@ namespace Kernel { class KernelCore; class WritableEvent; -class ReadableEvent final : public SynchronizationObject { +class ReadableEvent final : public KSynchronizationObject { friend class WritableEvent; public: @@ -32,9 +32,6 @@ public: return HANDLE_TYPE; } - bool ShouldWait(const Thread* thread) const override; - void Acquire(Thread* thread) override; - /// Unconditionally clears the readable event's state. void Clear(); @@ -46,11 +43,14 @@ public: /// then ERR_INVALID_STATE will be returned. ResultCode Reset(); - void Signal() override; + void Signal(); + + bool IsSignaled() const override; private: explicit ReadableEvent(KernelCore& kernel); + bool is_signaled{}; std::string name; ///< Name of event (optional) }; diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp index a549ae9d7..82857f93b 100644 --- a/src/core/hle/kernel/server_port.cpp +++ b/src/core/hle/kernel/server_port.cpp @@ -13,7 +13,7 @@ namespace Kernel { -ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {} +ServerPort::ServerPort(KernelCore& kernel) : KSynchronizationObject{kernel} {} ServerPort::~ServerPort() = default; ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { @@ -28,15 +28,9 @@ ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { void ServerPort::AppendPendingSession(std::shared_ptr<ServerSession> pending_session) { pending_sessions.push_back(std::move(pending_session)); -} - -bool ServerPort::ShouldWait(const Thread* thread) const { - // If there are no pending sessions, we wait until a new one is added. - return pending_sessions.empty(); -} - -void ServerPort::Acquire(Thread* thread) { - ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); + if (pending_sessions.size() == 1) { + NotifyAvailable(); + } } bool ServerPort::IsSignaled() const { diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h index 41b191b86..6470df993 100644 --- a/src/core/hle/kernel/server_port.h +++ b/src/core/hle/kernel/server_port.h @@ -9,8 +9,8 @@ #include <utility> #include <vector> #include "common/common_types.h" +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Kernel { @@ -20,7 +20,7 @@ class KernelCore; class ServerSession; class SessionRequestHandler; -class ServerPort final : public SynchronizationObject { +class ServerPort final : public KSynchronizationObject { public: explicit ServerPort(KernelCore& kernel); ~ServerPort() override; @@ -79,9 +79,6 @@ public: /// waiting to be accepted by this port. void AppendPendingSession(std::shared_ptr<ServerSession> pending_session); - bool ShouldWait(const Thread* thread) const override; - void Acquire(Thread* thread) override; - bool IsSignaled() const override; private: diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index b40fe3916..4f2bb7822 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -24,7 +24,7 @@ namespace Kernel { -ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} +ServerSession::ServerSession(KernelCore& kernel) : KSynchronizationObject{kernel} {} ServerSession::~ServerSession() { kernel.ReleaseServiceThread(service_thread); @@ -42,16 +42,6 @@ ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kern return MakeResult(std::move(session)); } -bool ServerSession::ShouldWait(const Thread* thread) const { - // Closed sessions should never wait, an error will be returned from svcReplyAndReceive. - if (!parent->Client()) { - return false; - } - - // Wait if we have no pending requests, or if we're currently handling a request. - return pending_requesting_threads.empty() || currently_handling != nullptr; -} - bool ServerSession::IsSignaled() const { // Closed sessions should never wait, an error will be returned from svcReplyAndReceive. if (!parent->Client()) { @@ -62,15 +52,6 @@ bool ServerSession::IsSignaled() const { return !pending_requesting_threads.empty() && currently_handling == nullptr; } -void ServerSession::Acquire(Thread* thread) { - ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); - // We are now handling a request, pop it from the stack. - // TODO(Subv): What happens if the client endpoint is closed before any requests are made? - ASSERT(!pending_requesting_threads.empty()); - currently_handling = pending_requesting_threads.back(); - pending_requesting_threads.pop_back(); -} - void ServerSession::ClientDisconnected() { // We keep a shared pointer to the hle handler to keep it alive throughout // the call to ClientDisconnected, as ClientDisconnected invalidates the @@ -172,7 +153,7 @@ ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) { { KScopedSchedulerLock lock(kernel); if (!context.IsThreadWaiting()) { - context.GetThread().ResumeFromWait(); + context.GetThread().Wakeup(); context.GetThread().SetSynchronizationResults(nullptr, result); } } diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index e8d1d99ea..9155cf7f5 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -10,8 +10,8 @@ #include <vector> #include "common/threadsafe_queue.h" +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/service_thread.h" -#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Core::Memory { @@ -43,7 +43,7 @@ class Thread; * After the server replies to the request, the response is marshalled back to the caller's * TLS buffer and control is transferred back to it. */ -class ServerSession final : public SynchronizationObject { +class ServerSession final : public KSynchronizationObject { friend class ServiceThread; public: @@ -77,8 +77,6 @@ public: return parent.get(); } - bool IsSignaled() const override; - /** * Sets the HLE handler for the session. This handler will be called to service IPC requests * instead of the regular IPC machinery. (The regular IPC machinery is currently not @@ -100,10 +98,6 @@ public: ResultCode HandleSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory, Core::Timing::CoreTiming& core_timing); - bool ShouldWait(const Thread* thread) const override; - - void Acquire(Thread* thread) override; - /// Called when a client disconnection occurs. void ClientDisconnected(); @@ -130,6 +124,8 @@ public: convert_to_domain = true; } + bool IsSignaled() const override; + private: /// Queues a sync request from the emulated application. ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp index e4dd53e24..75304b961 100644 --- a/src/core/hle/kernel/session.cpp +++ b/src/core/hle/kernel/session.cpp @@ -9,7 +9,7 @@ namespace Kernel { -Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {} +Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {} Session::~Session() = default; Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { @@ -24,18 +24,9 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { return std::make_pair(std::move(client_session), std::move(server_session)); } -bool Session::ShouldWait(const Thread* thread) const { - UNIMPLEMENTED(); - return {}; -} - bool Session::IsSignaled() const { UNIMPLEMENTED(); return true; } -void Session::Acquire(Thread* thread) { - UNIMPLEMENTED(); -} - } // namespace Kernel diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h index 7cd9c0d77..f6dd2c1d2 100644 --- a/src/core/hle/kernel/session.h +++ b/src/core/hle/kernel/session.h @@ -8,7 +8,7 @@ #include <string> #include <utility> -#include "core/hle/kernel/synchronization_object.h" +#include "core/hle/kernel/k_synchronization_object.h" namespace Kernel { @@ -19,7 +19,7 @@ class ServerSession; * Parent structure to link the client and server endpoints of a session with their associated * client port. */ -class Session final : public SynchronizationObject { +class Session final : public KSynchronizationObject { public: explicit Session(KernelCore& kernel); ~Session() override; @@ -37,12 +37,8 @@ public: return HANDLE_TYPE; } - bool ShouldWait(const Thread* thread) const override; - bool IsSignaled() const override; - void Acquire(Thread* thread) override; - std::shared_ptr<ClientSession> Client() { if (auto result{client.lock()}) { return result; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index de3ed25da..cc8b661af 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -10,6 +10,7 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/common_funcs.h" #include "common/fiber.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -19,26 +20,28 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/cpu_manager.h" -#include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_address_arbiter.h" +#include "core/hle/kernel/k_condition_variable.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/memory/memory_block.h" +#include "core/hle/kernel/memory/memory_layout.h" #include "core/hle/kernel/memory/page_table.h" -#include "core/hle/kernel/mutex.h" #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/svc.h" +#include "core/hle/kernel/svc_results.h" #include "core/hle/kernel/svc_types.h" #include "core/hle/kernel/svc_wrap.h" -#include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" #include "core/hle/kernel/transfer_memory.h" @@ -343,27 +346,11 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { auto thread = kernel.CurrentScheduler()->GetCurrentThread(); { KScopedSchedulerLock lock(kernel); - thread->InvalidateHLECallback(); - thread->SetStatus(ThreadStatus::WaitIPC); + thread->SetState(ThreadState::Waiting); + thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC); session->SendSyncRequest(SharedFrom(thread), system.Memory(), system.CoreTiming()); } - if (thread->HasHLECallback()) { - Handle event_handle = thread->GetHLETimeEvent(); - if (event_handle != InvalidHandle) { - auto& time_manager = kernel.TimeManager(); - time_manager.UnscheduleTimeEvent(event_handle); - } - - { - KScopedSchedulerLock lock(kernel); - auto* sync_object = thread->GetHLESyncObject(); - sync_object->RemoveWaitingThread(SharedFrom(thread)); - } - - thread->InvokeHLECallback(SharedFrom(thread)); - } - return thread->GetSignalingResult(); } @@ -436,7 +423,7 @@ static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* } /// Wait for the given handles to synchronize, timeout after the specified nanoseconds -static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address, +static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr handles_address, u64 handle_count, s64 nano_seconds) { LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}", handles_address, handle_count, nano_seconds); @@ -458,28 +445,26 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr } auto& kernel = system.Kernel(); - Thread::ThreadSynchronizationObjects objects(handle_count); + std::vector<KSynchronizationObject*> objects(handle_count); const auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); for (u64 i = 0; i < handle_count; ++i) { const Handle handle = memory.Read32(handles_address + i * sizeof(Handle)); - const auto object = handle_table.Get<SynchronizationObject>(handle); + const auto object = handle_table.Get<KSynchronizationObject>(handle); if (object == nullptr) { LOG_ERROR(Kernel_SVC, "Object is a nullptr"); return ERR_INVALID_HANDLE; } - objects[i] = object; + objects[i] = object.get(); } - auto& synchronization = kernel.Synchronization(); - const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds); - *index = handle_result; - return result; + return KSynchronizationObject::Wait(kernel, index, objects.data(), + static_cast<s32>(objects.size()), nano_seconds); } static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address, - s32 handle_count, u32 timeout_high, Handle* index) { + s32 handle_count, u32 timeout_high, s32* index) { const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)}; return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds); } @@ -504,56 +489,37 @@ static ResultCode CancelSynchronization32(Core::System& system, Handle thread_ha return CancelSynchronization(system, thread_handle); } -/// Attempts to locks a mutex, creating it if it does not already exist -static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle, - VAddr mutex_addr, Handle requesting_thread_handle) { - LOG_TRACE(Kernel_SVC, - "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, " - "requesting_current_thread_handle=0x{:08X}", - holding_thread_handle, mutex_addr, requesting_thread_handle); - - if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) { - LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}", - mutex_addr); - return ERR_INVALID_ADDRESS_STATE; - } +/// Attempts to locks a mutex +static ResultCode ArbitrateLock(Core::System& system, Handle thread_handle, VAddr address, + u32 tag) { + LOG_TRACE(Kernel_SVC, "called thread_handle=0x{:08X}, address=0x{:X}, tag=0x{:08X}", + thread_handle, address, tag); - if (!Common::IsWordAligned(mutex_addr)) { - LOG_ERROR(Kernel_SVC, "Mutex Address is not word aligned, mutex_addr={:016X}", mutex_addr); - return ERR_INVALID_ADDRESS; - } + // Validate the input address. + R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory); + R_UNLESS(Common::IsAligned(address, sizeof(u32)), Svc::ResultInvalidAddress); - auto* const current_process = system.Kernel().CurrentProcess(); - return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle, - requesting_thread_handle); + return system.Kernel().CurrentProcess()->WaitForAddress(thread_handle, address, tag); } -static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle, - u32 mutex_addr, Handle requesting_thread_handle) { - return ArbitrateLock(system, holding_thread_handle, mutex_addr, requesting_thread_handle); +static ResultCode ArbitrateLock32(Core::System& system, Handle thread_handle, u32 address, + u32 tag) { + return ArbitrateLock(system, thread_handle, address, tag); } /// Unlock a mutex -static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) { - LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr); - - if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) { - LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}", - mutex_addr); - return ERR_INVALID_ADDRESS_STATE; - } +static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) { + LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address); - if (!Common::IsWordAligned(mutex_addr)) { - LOG_ERROR(Kernel_SVC, "Mutex Address is not word aligned, mutex_addr={:016X}", mutex_addr); - return ERR_INVALID_ADDRESS; - } + // Validate the input address. + R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory); + R_UNLESS(Common::IsAligned(address, sizeof(u32)), Svc::ResultInvalidAddress); - auto* const current_process = system.Kernel().CurrentProcess(); - return current_process->GetMutex().Release(mutex_addr); + return system.Kernel().CurrentProcess()->SignalToAddress(address); } -static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) { - return ArbitrateUnlock(system, mutex_addr); +static ResultCode ArbitrateUnlock32(Core::System& system, u32 address) { + return ArbitrateUnlock(system, address); } enum class BreakType : u32 { @@ -1180,7 +1146,7 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri return ERR_INVALID_HANDLE; } - thread->SetPriority(priority); + thread->SetBasePriority(priority); return RESULT_SUCCESS; } @@ -1559,7 +1525,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) { return ERR_INVALID_HANDLE; } - ASSERT(thread->GetStatus() == ThreadStatus::Dormant); + ASSERT(thread->GetState() == ThreadState::Initialized); return thread->Start(); } @@ -1620,224 +1586,135 @@ static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanosec } /// Wait process wide key atomic -static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_addr, - VAddr condition_variable_addr, Handle thread_handle, - s64 nano_seconds) { - LOG_TRACE( - Kernel_SVC, - "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}", - mutex_addr, condition_variable_addr, thread_handle, nano_seconds); - - if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) { - LOG_ERROR( - Kernel_SVC, - "Given mutex address must not be within the kernel address space. address=0x{:016X}", - mutex_addr); - return ERR_INVALID_ADDRESS_STATE; - } - - if (!Common::IsWordAligned(mutex_addr)) { - LOG_ERROR(Kernel_SVC, "Given mutex address must be word-aligned. address=0x{:016X}", - mutex_addr); - return ERR_INVALID_ADDRESS; - } - - ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); - auto& kernel = system.Kernel(); - Handle event_handle; - Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread(); - auto* const current_process = kernel.CurrentProcess(); - { - KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds); - const auto& handle_table = current_process->GetHandleTable(); - std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); - ASSERT(thread); - - current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); - - if (thread->IsPendingTermination()) { - lock.CancelSleep(); - return ERR_THREAD_TERMINATING; - } - - const auto release_result = current_process->GetMutex().Release(mutex_addr); - if (release_result.IsError()) { - lock.CancelSleep(); - return release_result; - } - - if (nano_seconds == 0) { - lock.CancelSleep(); - return RESULT_TIMEOUT; - } - - current_thread->SetCondVarWaitAddress(condition_variable_addr); - current_thread->SetMutexWaitAddress(mutex_addr); - current_thread->SetWaitHandle(thread_handle); - current_thread->SetStatus(ThreadStatus::WaitCondVar); - current_process->InsertConditionVariableThread(SharedFrom(current_thread)); - } - - if (event_handle != InvalidHandle) { - auto& time_manager = kernel.TimeManager(); - time_manager.UnscheduleTimeEvent(event_handle); - } - - { - KScopedSchedulerLock lock(kernel); - - auto* owner = current_thread->GetLockOwner(); - if (owner != nullptr) { - owner->RemoveMutexWaiter(SharedFrom(current_thread)); +static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr address, VAddr cv_key, + u32 tag, s64 timeout_ns) { + LOG_TRACE(Kernel_SVC, "called address={:X}, cv_key={:X}, tag=0x{:08X}, timeout_ns={}", address, + cv_key, tag, timeout_ns); + + // Validate input. + R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory); + R_UNLESS(Common::IsAligned(address, sizeof(int32_t)), Svc::ResultInvalidAddress); + + // Convert timeout from nanoseconds to ticks. + s64 timeout{}; + if (timeout_ns > 0) { + const s64 offset_tick(timeout_ns); + if (offset_tick > 0) { + timeout = offset_tick + 2; + if (timeout <= 0) { + timeout = std::numeric_limits<s64>::max(); + } + } else { + timeout = std::numeric_limits<s64>::max(); } - - current_process->RemoveConditionVariableThread(SharedFrom(current_thread)); + } else { + timeout = timeout_ns; } - // Note: Deliberately don't attempt to inherit the lock owner's priority. - return current_thread->GetSignalingResult(); + // Wait on the condition variable. + return system.Kernel().CurrentProcess()->WaitConditionVariable( + address, Common::AlignDown(cv_key, sizeof(u32)), tag, timeout); } -static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr, - u32 condition_variable_addr, Handle thread_handle, - u32 nanoseconds_low, u32 nanoseconds_high) { - const auto nanoseconds = static_cast<s64>(nanoseconds_low | (u64{nanoseconds_high} << 32)); - return WaitProcessWideKeyAtomic(system, mutex_addr, condition_variable_addr, thread_handle, - nanoseconds); +static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 address, u32 cv_key, u32 tag, + u32 timeout_ns_low, u32 timeout_ns_high) { + const auto timeout_ns = static_cast<s64>(timeout_ns_low | (u64{timeout_ns_high} << 32)); + return WaitProcessWideKeyAtomic(system, address, cv_key, tag, timeout_ns); } /// Signal process wide key -static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) { - LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}", - condition_variable_addr, target); +static void SignalProcessWideKey(Core::System& system, VAddr cv_key, s32 count) { + LOG_TRACE(Kernel_SVC, "called, cv_key=0x{:X}, count=0x{:08X}", cv_key, count); - ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); + // Signal the condition variable. + return system.Kernel().CurrentProcess()->SignalConditionVariable( + Common::AlignDown(cv_key, sizeof(u32)), count); +} - // Retrieve a list of all threads that are waiting for this condition variable. - auto& kernel = system.Kernel(); - KScopedSchedulerLock lock(kernel); - auto* const current_process = kernel.CurrentProcess(); - std::vector<std::shared_ptr<Thread>> waiting_threads = - current_process->GetConditionVariableThreads(condition_variable_addr); - - // Only process up to 'target' threads, unless 'target' is less equal 0, in which case process - // them all. - std::size_t last = waiting_threads.size(); - if (target > 0) { - last = std::min(waiting_threads.size(), static_cast<std::size_t>(target)); - } - for (std::size_t index = 0; index < last; ++index) { - auto& thread = waiting_threads[index]; - - ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr); - - // liberate Cond Var Thread. - current_process->RemoveConditionVariableThread(thread); - - const std::size_t current_core = system.CurrentCoreIndex(); - auto& monitor = system.Monitor(); - - // Atomically read the value of the mutex. - u32 mutex_val = 0; - u32 update_val = 0; - const VAddr mutex_address = thread->GetMutexWaitAddress(); - do { - // If the mutex is not yet acquired, acquire it. - mutex_val = monitor.ExclusiveRead32(current_core, mutex_address); - - if (mutex_val != 0) { - update_val = mutex_val | Mutex::MutexHasWaitersFlag; - } else { - update_val = thread->GetWaitHandle(); - } - } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val)); - monitor.ClearExclusive(); - if (mutex_val == 0) { - // We were able to acquire the mutex, resume this thread. - auto* const lock_owner = thread->GetLockOwner(); - if (lock_owner != nullptr) { - lock_owner->RemoveMutexWaiter(thread); - } +static void SignalProcessWideKey32(Core::System& system, u32 cv_key, s32 count) { + SignalProcessWideKey(system, cv_key, count); +} - thread->SetLockOwner(nullptr); - thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS); - thread->ResumeFromWait(); - } else { - // The mutex is already owned by some other thread, make this thread wait on it. - const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); - auto owner = handle_table.Get<Thread>(owner_handle); - ASSERT(owner); - if (thread->GetStatus() == ThreadStatus::WaitCondVar) { - thread->SetStatus(ThreadStatus::WaitMutex); - } +namespace { - owner->AddMutexWaiter(thread); - } +constexpr bool IsValidSignalType(Svc::SignalType type) { + switch (type) { + case Svc::SignalType::Signal: + case Svc::SignalType::SignalAndIncrementIfEqual: + case Svc::SignalType::SignalAndModifyByWaitingCountIfEqual: + return true; + default: + return false; } } -static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) { - SignalProcessWideKey(system, condition_variable_addr, target); +constexpr bool IsValidArbitrationType(Svc::ArbitrationType type) { + switch (type) { + case Svc::ArbitrationType::WaitIfLessThan: + case Svc::ArbitrationType::DecrementAndWaitIfLessThan: + case Svc::ArbitrationType::WaitIfEqual: + return true; + default: + return false; + } } -// Wait for an address (via Address Arbiter) -static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, - s64 timeout) { - LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address, - type, value, timeout); - - // If the passed address is a kernel virtual address, return invalid memory state. - if (Core::Memory::IsKernelVirtualAddress(address)) { - LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); - return ERR_INVALID_ADDRESS_STATE; - } +} // namespace - // If the address is not properly aligned to 4 bytes, return invalid address. - if (!Common::IsWordAligned(address)) { - LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address); - return ERR_INVALID_ADDRESS; +// Wait for an address (via Address Arbiter) +static ResultCode WaitForAddress(Core::System& system, VAddr address, Svc::ArbitrationType arb_type, + s32 value, s64 timeout_ns) { + LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, arb_type=0x{:X}, value=0x{:X}, timeout_ns={}", + address, arb_type, value, timeout_ns); + + // Validate input. + R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory); + R_UNLESS(Common::IsAligned(address, sizeof(int32_t)), Svc::ResultInvalidAddress); + R_UNLESS(IsValidArbitrationType(arb_type), Svc::ResultInvalidEnumValue); + + // Convert timeout from nanoseconds to ticks. + s64 timeout{}; + if (timeout_ns > 0) { + const s64 offset_tick(timeout_ns); + if (offset_tick > 0) { + timeout = offset_tick + 2; + if (timeout <= 0) { + timeout = std::numeric_limits<s64>::max(); + } + } else { + timeout = std::numeric_limits<s64>::max(); + } + } else { + timeout = timeout_ns; } - const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type); - auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); - const ResultCode result = - address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); - return result; + return system.Kernel().CurrentProcess()->WaitAddressArbiter(address, arb_type, value, timeout); } -static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value, - u32 timeout_low, u32 timeout_high) { - const auto timeout = static_cast<s64>(timeout_low | (u64{timeout_high} << 32)); - return WaitForAddress(system, address, type, value, timeout); +static ResultCode WaitForAddress32(Core::System& system, u32 address, Svc::ArbitrationType arb_type, + s32 value, u32 timeout_ns_low, u32 timeout_ns_high) { + const auto timeout = static_cast<s64>(timeout_ns_low | (u64{timeout_ns_high} << 32)); + return WaitForAddress(system, address, arb_type, value, timeout); } // Signals to an address (via Address Arbiter) -static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, - s32 num_to_wake) { - LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", - address, type, value, num_to_wake); - - // If the passed address is a kernel virtual address, return invalid memory state. - if (Core::Memory::IsKernelVirtualAddress(address)) { - LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); - return ERR_INVALID_ADDRESS_STATE; - } +static ResultCode SignalToAddress(Core::System& system, VAddr address, Svc::SignalType signal_type, + s32 value, s32 count) { + LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, signal_type=0x{:X}, value=0x{:X}, count=0x{:X}", + address, signal_type, value, count); - // If the address is not properly aligned to 4 bytes, return invalid address. - if (!Common::IsWordAligned(address)) { - LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address); - return ERR_INVALID_ADDRESS; - } + // Validate input. + R_UNLESS(!Memory::IsKernelAddress(address), Svc::ResultInvalidCurrentMemory); + R_UNLESS(Common::IsAligned(address, sizeof(s32)), Svc::ResultInvalidAddress); + R_UNLESS(IsValidSignalType(signal_type), Svc::ResultInvalidEnumValue); - const auto signal_type = static_cast<AddressArbiter::SignalType>(type); - auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); - return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake); + return system.Kernel().CurrentProcess()->SignalAddressArbiter(address, signal_type, value, + count); } -static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value, - s32 num_to_wake) { - return SignalToAddress(system, address, type, value, num_to_wake); +static ResultCode SignalToAddress32(Core::System& system, u32 address, Svc::SignalType signal_type, + s32 value, s32 count) { + return SignalToAddress(system, address, signal_type, value, count); } static void KernelDebug([[maybe_unused]] Core::System& system, diff --git a/src/core/hle/kernel/svc_common.h b/src/core/hle/kernel/svc_common.h new file mode 100644 index 000000000..4af049551 --- /dev/null +++ b/src/core/hle/kernel/svc_common.h @@ -0,0 +1,14 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Kernel::Svc { + +constexpr s32 ArgumentHandleCountMax = 0x40; +constexpr u32 HandleWaitMask{1u << 30}; + +} // namespace Kernel::Svc diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h new file mode 100644 index 000000000..78282f021 --- /dev/null +++ b/src/core/hle/kernel/svc_results.h @@ -0,0 +1,20 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hle/result.h" + +namespace Kernel::Svc { + +constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59}; +constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102}; +constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106}; +constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114}; +constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117}; +constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118}; +constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120}; +constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125}; + +} // namespace Kernel::Svc diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h index 11e1d8e2d..d623f7a50 100644 --- a/src/core/hle/kernel/svc_types.h +++ b/src/core/hle/kernel/svc_types.h @@ -65,4 +65,16 @@ struct MemoryInfo { u32 padding{}; }; +enum class SignalType : u32 { + Signal = 0, + SignalAndIncrementIfEqual = 1, + SignalAndModifyByWaitingCountIfEqual = 2, +}; + +enum class ArbitrationType : u32 { + WaitIfLessThan = 0, + DecrementAndWaitIfLessThan = 1, + WaitIfEqual = 2, +}; + } // namespace Kernel::Svc diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 0b6dd9df0..a32750ed7 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -7,6 +7,7 @@ #include "common/common_types.h" #include "core/arm/arm_interface.h" #include "core/core.h" +#include "core/hle/kernel/svc_types.h" #include "core/hle/result.h" namespace Kernel { @@ -215,9 +216,10 @@ void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); } -template <ResultCode func(Core::System&, u32*, u64, u64, s64)> +// Used by WaitSynchronization +template <ResultCode func(Core::System&, s32*, u64, u64, s64)> void SvcWrap64(Core::System& system) { - u32 param_1 = 0; + s32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) .raw; @@ -276,18 +278,22 @@ void SvcWrap64(Core::System& system) { FuncReturn(system, retval); } -template <ResultCode func(Core::System&, u64, u32, s32, s64)> +// Used by WaitForAddress +template <ResultCode func(Core::System&, u64, Svc::ArbitrationType, s32, s64)> void SvcWrap64(Core::System& system) { - FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), - static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) - .raw); + FuncReturn(system, + func(system, Param(system, 0), static_cast<Svc::ArbitrationType>(Param(system, 1)), + static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) + .raw); } -template <ResultCode func(Core::System&, u64, u32, s32, s32)> +// Used by SignalToAddress +template <ResultCode func(Core::System&, u64, Svc::SignalType, s32, s32)> void SvcWrap64(Core::System& system) { - FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), - static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) - .raw); + FuncReturn(system, + func(system, Param(system, 0), static_cast<Svc::SignalType>(Param(system, 1)), + static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) + .raw); } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -503,22 +509,23 @@ void SvcWrap32(Core::System& system) { } // Used by WaitForAddress32 -template <ResultCode func(Core::System&, u32, u32, s32, u32, u32)> +template <ResultCode func(Core::System&, u32, Svc::ArbitrationType, s32, u32, u32)> void SvcWrap32(Core::System& system) { const u32 retval = func(system, static_cast<u32>(Param(system, 0)), - static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)), - static_cast<u32>(Param(system, 3)), static_cast<u32>(Param(system, 4))) + static_cast<Svc::ArbitrationType>(Param(system, 1)), + static_cast<s32>(Param(system, 2)), static_cast<u32>(Param(system, 3)), + static_cast<u32>(Param(system, 4))) .raw; FuncReturn(system, retval); } // Used by SignalToAddress32 -template <ResultCode func(Core::System&, u32, u32, s32, s32)> +template <ResultCode func(Core::System&, u32, Svc::SignalType, s32, s32)> void SvcWrap32(Core::System& system) { - const u32 retval = - func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)), - static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) - .raw; + const u32 retval = func(system, static_cast<u32>(Param(system, 0)), + static_cast<Svc::SignalType>(Param(system, 1)), + static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) + .raw; FuncReturn(system, retval); } @@ -539,9 +546,9 @@ void SvcWrap32(Core::System& system) { } // Used by WaitSynchronization32 -template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)> +template <ResultCode func(Core::System&, u32, u32, s32, u32, s32*)> void SvcWrap32(Core::System& system) { - u32 param_1 = 0; + s32 param_1 = 0; const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2), Param32(system, 3), ¶m_1) .raw; diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp deleted file mode 100644 index d3f520ea2..000000000 --- a/src/core/hle/kernel/synchronization.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "core/core.h" -#include "core/hle/kernel/errors.h" -#include "core/hle/kernel/handle_table.h" -#include "core/hle/kernel/k_scheduler.h" -#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/synchronization.h" -#include "core/hle/kernel/synchronization_object.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/kernel/time_manager.h" - -namespace Kernel { - -Synchronization::Synchronization(Core::System& system) : system{system} {} - -void Synchronization::SignalObject(SynchronizationObject& obj) const { - auto& kernel = system.Kernel(); - KScopedSchedulerLock lock(kernel); - if (obj.IsSignaled()) { - for (auto thread : obj.GetWaitingThreads()) { - if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) { - if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); - ASSERT(thread->IsWaitingSync()); - } - thread->SetSynchronizationResults(&obj, RESULT_SUCCESS); - thread->ResumeFromWait(); - } - } - obj.ClearWaitingThreads(); - } -} - -std::pair<ResultCode, Handle> Synchronization::WaitFor( - std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { - auto& kernel = system.Kernel(); - auto* const thread = kernel.CurrentScheduler()->GetCurrentThread(); - Handle event_handle = InvalidHandle; - { - KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds); - const auto itr = - std::find_if(sync_objects.begin(), sync_objects.end(), - [thread](const std::shared_ptr<SynchronizationObject>& object) { - return object->IsSignaled(); - }); - - if (itr != sync_objects.end()) { - // We found a ready object, acquire it and set the result value - SynchronizationObject* object = itr->get(); - object->Acquire(thread); - const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); - lock.CancelSleep(); - return {RESULT_SUCCESS, index}; - } - - if (nano_seconds == 0) { - lock.CancelSleep(); - return {RESULT_TIMEOUT, InvalidHandle}; - } - - if (thread->IsPendingTermination()) { - lock.CancelSleep(); - return {ERR_THREAD_TERMINATING, InvalidHandle}; - } - - if (thread->IsSyncCancelled()) { - thread->SetSyncCancelled(false); - lock.CancelSleep(); - return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; - } - - for (auto& object : sync_objects) { - object->AddWaitingThread(SharedFrom(thread)); - } - - thread->SetSynchronizationObjects(&sync_objects); - thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); - thread->SetStatus(ThreadStatus::WaitSynch); - thread->SetWaitingSync(true); - } - thread->SetWaitingSync(false); - - if (event_handle != InvalidHandle) { - auto& time_manager = kernel.TimeManager(); - time_manager.UnscheduleTimeEvent(event_handle); - } - - { - KScopedSchedulerLock lock(kernel); - ResultCode signaling_result = thread->GetSignalingResult(); - SynchronizationObject* signaling_object = thread->GetSignalingObject(); - thread->SetSynchronizationObjects(nullptr); - auto shared_thread = SharedFrom(thread); - for (auto& obj : sync_objects) { - obj->RemoveWaitingThread(shared_thread); - } - if (signaling_object != nullptr) { - const auto itr = std::find_if( - sync_objects.begin(), sync_objects.end(), - [signaling_object](const std::shared_ptr<SynchronizationObject>& object) { - return object.get() == signaling_object; - }); - ASSERT(itr != sync_objects.end()); - signaling_object->Acquire(thread); - const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); - return {signaling_result, index}; - } - return {signaling_result, -1}; - } -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/synchronization.h b/src/core/hle/kernel/synchronization.h deleted file mode 100644 index 379f4b1d3..000000000 --- a/src/core/hle/kernel/synchronization.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <utility> -#include <vector> - -#include "core/hle/kernel/object.h" -#include "core/hle/result.h" - -namespace Core { -class System; -} // namespace Core - -namespace Kernel { - -class SynchronizationObject; - -/** - * The 'Synchronization' class is an interface for handling synchronization methods - * used by Synchronization objects and synchronization SVCs. This centralizes processing of - * such - */ -class Synchronization { -public: - explicit Synchronization(Core::System& system); - - /// Signals a synchronization object, waking up all its waiting threads - void SignalObject(SynchronizationObject& obj) const; - - /// Tries to see if waiting for any of the sync_objects is necessary, if not - /// it returns Success and the handle index of the signaled sync object. In - /// case not, the current thread will be locked and wait for nano_seconds or - /// for a synchronization object to signal. - std::pair<ResultCode, Handle> WaitFor( - std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds); - -private: - Core::System& system; -}; -} // namespace Kernel diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp deleted file mode 100644 index ba4d39157..000000000 --- a/src/core/hle/kernel/synchronization_object.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "core/core.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/object.h" -#include "core/hle/kernel/process.h" -#include "core/hle/kernel/synchronization.h" -#include "core/hle/kernel/synchronization_object.h" -#include "core/hle/kernel/thread.h" - -namespace Kernel { - -SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {} -SynchronizationObject::~SynchronizationObject() = default; - -void SynchronizationObject::Signal() { - kernel.Synchronization().SignalObject(*this); -} - -void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) { - auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread); - if (itr == waiting_threads.end()) - waiting_threads.push_back(std::move(thread)); -} - -void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) { - auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread); - // If a thread passed multiple handles to the same object, - // the kernel might attempt to remove the thread from the object's - // waiting threads list multiple times. - if (itr != waiting_threads.end()) - waiting_threads.erase(itr); -} - -void SynchronizationObject::ClearWaitingThreads() { - waiting_threads.clear(); -} - -const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const { - return waiting_threads; -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h deleted file mode 100644 index 7408ed51f..000000000 --- a/src/core/hle/kernel/synchronization_object.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <memory> -#include <vector> - -#include "core/hle/kernel/object.h" - -namespace Kernel { - -class KernelCore; -class Synchronization; -class Thread; - -/// Class that represents a Kernel object that a thread can be waiting on -class SynchronizationObject : public Object { -public: - explicit SynchronizationObject(KernelCore& kernel); - ~SynchronizationObject() override; - - /** - * Check if the specified thread should wait until the object is available - * @param thread The thread about which we're deciding. - * @return True if the current thread should wait due to this object being unavailable - */ - virtual bool ShouldWait(const Thread* thread) const = 0; - - /// Acquire/lock the object for the specified thread if it is available - virtual void Acquire(Thread* thread) = 0; - - /// Signal this object - virtual void Signal(); - - virtual bool IsSignaled() const { - return is_signaled; - } - - /** - * Add a thread to wait on this object - * @param thread Pointer to thread to add - */ - void AddWaitingThread(std::shared_ptr<Thread> thread); - - /** - * Removes a thread from waiting on this object (e.g. if it was resumed already) - * @param thread Pointer to thread to remove - */ - void RemoveWaitingThread(std::shared_ptr<Thread> thread); - - /// Get a const reference to the waiting threads list for debug use - const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const; - - void ClearWaitingThreads(); - -protected: - std::atomic_bool is_signaled{}; // Tells if this sync object is signaled - -private: - /// Threads waiting for this object to become available - std::vector<std::shared_ptr<Thread>> waiting_threads; -}; - -// Specialization of DynamicObjectCast for SynchronizationObjects -template <> -inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>( - std::shared_ptr<Object> object) { - if (object != nullptr && object->IsWaitable()) { - return std::static_pointer_cast<SynchronizationObject>(object); - } - return nullptr; -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index a4f9e0d97..d97323255 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -17,9 +17,11 @@ #include "core/hardware_properties.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_condition_variable.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/memory_layout.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/thread.h" @@ -34,26 +36,19 @@ namespace Kernel { -bool Thread::ShouldWait(const Thread* thread) const { - return status != ThreadStatus::Dead; -} - bool Thread::IsSignaled() const { - return status == ThreadStatus::Dead; -} - -void Thread::Acquire(Thread* thread) { - ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); + return signaled; } -Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {} +Thread::Thread(KernelCore& kernel) : KSynchronizationObject{kernel} {} Thread::~Thread() = default; void Thread::Stop() { { KScopedSchedulerLock lock(kernel); - SetStatus(ThreadStatus::Dead); - Signal(); + SetState(ThreadState::Terminated); + signaled = true; + NotifyAvailable(); kernel.GlobalHandleTable().Close(global_handle); if (owner_process) { @@ -67,59 +62,27 @@ void Thread::Stop() { global_handle = 0; } -void Thread::ResumeFromWait() { +void Thread::Wakeup() { KScopedSchedulerLock lock(kernel); - switch (status) { - case ThreadStatus::Paused: - case ThreadStatus::WaitSynch: - case ThreadStatus::WaitHLEEvent: - case ThreadStatus::WaitSleep: - case ThreadStatus::WaitIPC: - case ThreadStatus::WaitMutex: - case ThreadStatus::WaitCondVar: - case ThreadStatus::WaitArb: - case ThreadStatus::Dormant: - break; - - case ThreadStatus::Ready: - // The thread's wakeup callback must have already been cleared when the thread was first - // awoken. - ASSERT(hle_callback == nullptr); - // If the thread is waiting on multiple wait objects, it might be awoken more than once - // before actually resuming. We can ignore subsequent wakeups if the thread status has - // already been set to ThreadStatus::Ready. - return; - case ThreadStatus::Dead: - // This should never happen, as threads must complete before being stopped. - DEBUG_ASSERT_MSG(false, "Thread with object id {} cannot be resumed because it's DEAD.", - GetObjectId()); - return; - } - - SetStatus(ThreadStatus::Ready); -} - -void Thread::OnWakeUp() { - KScopedSchedulerLock lock(kernel); - SetStatus(ThreadStatus::Ready); + SetState(ThreadState::Runnable); } ResultCode Thread::Start() { KScopedSchedulerLock lock(kernel); - SetStatus(ThreadStatus::Ready); + SetState(ThreadState::Runnable); return RESULT_SUCCESS; } void Thread::CancelWait() { KScopedSchedulerLock lock(kernel); - if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) { + if (GetState() != ThreadState::Waiting || !is_cancellable) { is_sync_cancelled = true; return; } // TODO(Blinkhawk): Implement cancel of server session is_sync_cancelled = false; SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED); - SetStatus(ThreadStatus::Ready); + SetState(ThreadState::Runnable); } static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top, @@ -183,25 +146,24 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel); thread->thread_id = kernel.CreateNewThreadID(); - thread->status = ThreadStatus::Dormant; + thread->thread_state = ThreadState::Initialized; thread->entry_point = entry_point; thread->stack_top = stack_top; thread->disable_count = 1; thread->tpidr_el0 = 0; - thread->nominal_priority = thread->current_priority = priority; + thread->current_priority = priority; + thread->base_priority = priority; + thread->lock_owner = nullptr; thread->schedule_count = -1; thread->last_scheduled_tick = 0; thread->processor_id = processor_id; thread->ideal_core = processor_id; thread->affinity_mask.SetAffinity(processor_id, true); - thread->wait_objects = nullptr; - thread->mutex_wait_address = 0; - thread->condvar_wait_address = 0; - thread->wait_handle = 0; thread->name = std::move(name); thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap(); thread->owner_process = owner_process; thread->type = type_flags; + thread->signaled = false; if ((type_flags & THREADTYPE_IDLE) == 0) { auto& scheduler = kernel.GlobalSchedulerContext(); scheduler.AddThread(thread); @@ -226,153 +188,185 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); } -void Thread::SetPriority(u32 priority) { - KScopedSchedulerLock lock(kernel); +void Thread::SetBasePriority(u32 priority) { ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST, "Invalid priority value."); - nominal_priority = priority; - UpdatePriority(); + + KScopedSchedulerLock lock(kernel); + + // Change our base priority. + base_priority = priority; + + // Perform a priority restoration. + RestorePriority(kernel, this); } -void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) { +void Thread::SetSynchronizationResults(KSynchronizationObject* object, ResultCode result) { signaling_object = object; signaling_result = result; } -s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { - ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything"); - const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object); - return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1); -} - VAddr Thread::GetCommandBufferAddress() const { // Offset from the start of TLS at which the IPC command buffer begins. constexpr u64 command_header_offset = 0x80; return GetTLSAddress() + command_header_offset; } -void Thread::SetStatus(ThreadStatus new_status) { - if (new_status == status) { - return; - } +void Thread::SetState(ThreadState state) { + KScopedSchedulerLock sl(kernel); - switch (new_status) { - case ThreadStatus::Ready: - SetSchedulingStatus(ThreadSchedStatus::Runnable); - break; - case ThreadStatus::Dormant: - SetSchedulingStatus(ThreadSchedStatus::None); - break; - case ThreadStatus::Dead: - SetSchedulingStatus(ThreadSchedStatus::Exited); - break; - default: - SetSchedulingStatus(ThreadSchedStatus::Paused); - break; - } + // Clear debugging state + SetMutexWaitAddressForDebugging({}); + SetWaitReasonForDebugging({}); - status = new_status; + const ThreadState old_state = thread_state; + thread_state = + static_cast<ThreadState>((old_state & ~ThreadState::Mask) | (state & ThreadState::Mask)); + if (thread_state != old_state) { + KScheduler::OnThreadStateChanged(kernel, this, old_state); + } } -void Thread::AddMutexWaiter(std::shared_ptr<Thread> thread) { - if (thread->lock_owner.get() == this) { - // If the thread is already waiting for this thread to release the mutex, ensure that the - // waiters list is consistent and return without doing anything. - const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); - ASSERT(iter != wait_mutex_threads.end()); - return; +void Thread::AddWaiterImpl(Thread* thread) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + // Find the right spot to insert the waiter. + auto it = waiter_list.begin(); + while (it != waiter_list.end()) { + if (it->GetPriority() > thread->GetPriority()) { + break; + } + it++; } - // A thread can't wait on two different mutexes at the same time. - ASSERT(thread->lock_owner == nullptr); + // Keep track of how many kernel waiters we have. + if (Memory::IsKernelAddressKey(thread->GetAddressKey())) { + ASSERT((num_kernel_waiters++) >= 0); + } - // Ensure that the thread is not already in the list of mutex waiters - const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); - ASSERT(iter == wait_mutex_threads.end()); + // Insert the waiter. + waiter_list.insert(it, *thread); + thread->SetLockOwner(this); +} - // Keep the list in an ordered fashion - const auto insertion_point = std::find_if( - wait_mutex_threads.begin(), wait_mutex_threads.end(), - [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); }); - wait_mutex_threads.insert(insertion_point, thread); - thread->lock_owner = SharedFrom(this); +void Thread::RemoveWaiterImpl(Thread* thread) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); - UpdatePriority(); -} + // Keep track of how many kernel waiters we have. + if (Memory::IsKernelAddressKey(thread->GetAddressKey())) { + ASSERT((num_kernel_waiters--) > 0); + } -void Thread::RemoveMutexWaiter(std::shared_ptr<Thread> thread) { - ASSERT(thread->lock_owner.get() == this); + // Remove the waiter. + waiter_list.erase(waiter_list.iterator_to(*thread)); + thread->SetLockOwner(nullptr); +} - // Ensure that the thread is in the list of mutex waiters - const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); - ASSERT(iter != wait_mutex_threads.end()); +void Thread::RestorePriority(KernelCore& kernel, Thread* thread) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); - wait_mutex_threads.erase(iter); + while (true) { + // We want to inherit priority where possible. + s32 new_priority = thread->GetBasePriority(); + if (thread->HasWaiters()) { + new_priority = std::min(new_priority, thread->waiter_list.front().GetPriority()); + } - thread->lock_owner = nullptr; - UpdatePriority(); -} + // If the priority we would inherit is not different from ours, don't do anything. + if (new_priority == thread->GetPriority()) { + return; + } -void Thread::UpdatePriority() { - // If any of the threads waiting on the mutex have a higher priority - // (taking into account priority inheritance), then this thread inherits - // that thread's priority. - u32 new_priority = nominal_priority; - if (!wait_mutex_threads.empty()) { - if (wait_mutex_threads.front()->current_priority < new_priority) { - new_priority = wait_mutex_threads.front()->current_priority; + // Ensure we don't violate condition variable red black tree invariants. + if (auto* cv_tree = thread->GetConditionVariableTree(); cv_tree != nullptr) { + BeforeUpdatePriority(kernel, cv_tree, thread); } - } - if (new_priority == current_priority) { - return; - } + // Change the priority. + const s32 old_priority = thread->GetPriority(); + thread->SetPriority(new_priority); - if (GetStatus() == ThreadStatus::WaitCondVar) { - owner_process->RemoveConditionVariableThread(SharedFrom(this)); - } + // Restore the condition variable, if relevant. + if (auto* cv_tree = thread->GetConditionVariableTree(); cv_tree != nullptr) { + AfterUpdatePriority(kernel, cv_tree, thread); + } - SetCurrentPriority(new_priority); + // Update the scheduler. + KScheduler::OnThreadPriorityChanged(kernel, thread, old_priority); - if (GetStatus() == ThreadStatus::WaitCondVar) { - owner_process->InsertConditionVariableThread(SharedFrom(this)); - } + // Keep the lock owner up to date. + Thread* lock_owner = thread->GetLockOwner(); + if (lock_owner == nullptr) { + return; + } - if (!lock_owner) { - return; + // Update the thread in the lock owner's sorted list, and continue inheriting. + lock_owner->RemoveWaiterImpl(thread); + lock_owner->AddWaiterImpl(thread); + thread = lock_owner; } +} - // Ensure that the thread is within the correct location in the waiting list. - auto old_owner = lock_owner; - lock_owner->RemoveMutexWaiter(SharedFrom(this)); - old_owner->AddMutexWaiter(SharedFrom(this)); - - // Recursively update the priority of the thread that depends on the priority of this one. - lock_owner->UpdatePriority(); +void Thread::AddWaiter(Thread* thread) { + AddWaiterImpl(thread); + RestorePriority(kernel, this); } -bool Thread::AllSynchronizationObjectsReady() const { - return std::none_of(wait_objects->begin(), wait_objects->end(), - [this](const std::shared_ptr<SynchronizationObject>& object) { - return object->ShouldWait(this); - }); +void Thread::RemoveWaiter(Thread* thread) { + RemoveWaiterImpl(thread); + RestorePriority(kernel, this); } -bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) { - ASSERT(hle_callback); - return hle_callback(std::move(thread)); +Thread* Thread::RemoveWaiterByKey(s32* out_num_waiters, VAddr key) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + s32 num_waiters{}; + Thread* next_lock_owner{}; + auto it = waiter_list.begin(); + while (it != waiter_list.end()) { + if (it->GetAddressKey() == key) { + Thread* thread = std::addressof(*it); + + // Keep track of how many kernel waiters we have. + if (Memory::IsKernelAddressKey(thread->GetAddressKey())) { + ASSERT((num_kernel_waiters--) > 0); + } + it = waiter_list.erase(it); + + // Update the next lock owner. + if (next_lock_owner == nullptr) { + next_lock_owner = thread; + next_lock_owner->SetLockOwner(nullptr); + } else { + next_lock_owner->AddWaiterImpl(thread); + } + num_waiters++; + } else { + it++; + } + } + + // Do priority updates, if we have a next owner. + if (next_lock_owner) { + RestorePriority(kernel, this); + RestorePriority(kernel, next_lock_owner); + } + + // Return output. + *out_num_waiters = num_waiters; + return next_lock_owner; } ResultCode Thread::SetActivity(ThreadActivity value) { KScopedSchedulerLock lock(kernel); - auto sched_status = GetSchedulingStatus(); + auto sched_status = GetState(); - if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) { + if (sched_status != ThreadState::Runnable && sched_status != ThreadState::Waiting) { return ERR_INVALID_STATE; } - if (IsPendingTermination()) { + if (IsTerminationRequested()) { return RESULT_SUCCESS; } @@ -394,7 +388,8 @@ ResultCode Thread::Sleep(s64 nanoseconds) { Handle event_handle{}; { KScopedSchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds); - SetStatus(ThreadStatus::WaitSleep); + SetState(ThreadState::Waiting); + SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Sleep); } if (event_handle != InvalidHandle) { @@ -405,34 +400,21 @@ ResultCode Thread::Sleep(s64 nanoseconds) { } void Thread::AddSchedulingFlag(ThreadSchedFlags flag) { - const u32 old_state = scheduling_state; + const auto old_state = GetRawState(); pausing_state |= static_cast<u32>(flag); - const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); - scheduling_state = base_scheduling | pausing_state; + const auto base_scheduling = GetState(); + thread_state = base_scheduling | static_cast<ThreadState>(pausing_state); KScheduler::OnThreadStateChanged(kernel, this, old_state); } void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) { - const u32 old_state = scheduling_state; + const auto old_state = GetRawState(); pausing_state &= ~static_cast<u32>(flag); - const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); - scheduling_state = base_scheduling | pausing_state; + const auto base_scheduling = GetState(); + thread_state = base_scheduling | static_cast<ThreadState>(pausing_state); KScheduler::OnThreadStateChanged(kernel, this, old_state); } -void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { - const u32 old_state = scheduling_state; - scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) | - static_cast<u32>(new_status); - KScheduler::OnThreadStateChanged(kernel, this, old_state); -} - -void Thread::SetCurrentPriority(u32 new_priority) { - const u32 old_priority = std::exchange(current_priority, new_priority); - KScheduler::OnThreadPriorityChanged(kernel, this, kernel.CurrentScheduler()->GetCurrentThread(), - old_priority); -} - ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { KScopedSchedulerLock lock(kernel); const auto HighestSetCore = [](u64 mask, u32 max_cores) { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 11ef29888..6b66c9a0e 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -6,16 +6,21 @@ #include <array> #include <functional> +#include <span> #include <string> #include <utility> #include <vector> +#include <boost/intrusive/list.hpp> + #include "common/common_types.h" +#include "common/intrusive_red_black_tree.h" #include "common/spin_lock.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/k_affinity_mask.h" +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/synchronization_object.h" +#include "core/hle/kernel/svc_common.h" #include "core/hle/result.h" namespace Common { @@ -73,19 +78,24 @@ enum ThreadProcessorId : s32 { (1 << THREADPROCESSORID_2) | (1 << THREADPROCESSORID_3) }; -enum class ThreadStatus { - Ready, ///< Ready to run - Paused, ///< Paused by SetThreadActivity or debug - WaitHLEEvent, ///< Waiting for hle event to finish - WaitSleep, ///< Waiting due to a SleepThread SVC - WaitIPC, ///< Waiting for the reply from an IPC request - WaitSynch, ///< Waiting due to WaitSynchronization - WaitMutex, ///< Waiting due to an ArbitrateLock svc - WaitCondVar, ///< Waiting due to an WaitProcessWideKey svc - WaitArb, ///< Waiting due to a SignalToAddress/WaitForAddress svc - Dormant, ///< Created but not yet made ready - Dead ///< Run to completion, or forcefully terminated +enum class ThreadState : u16 { + Initialized = 0, + Waiting = 1, + Runnable = 2, + Terminated = 3, + + SuspendShift = 4, + Mask = (1 << SuspendShift) - 1, + + ProcessSuspended = (1 << (0 + SuspendShift)), + ThreadSuspended = (1 << (1 + SuspendShift)), + DebugSuspended = (1 << (2 + SuspendShift)), + BacktraceSuspended = (1 << (3 + SuspendShift)), + InitSuspended = (1 << (4 + SuspendShift)), + + SuspendFlagMask = ((1 << 5) - 1) << SuspendShift, }; +DECLARE_ENUM_FLAG_OPERATORS(ThreadState); enum class ThreadWakeupReason { Signal, // The thread was woken up by WakeupAllWaitingThreads due to an object signal. @@ -97,13 +107,6 @@ enum class ThreadActivity : u32 { Paused = 1, }; -enum class ThreadSchedStatus : u32 { - None = 0, - Paused = 1, - Runnable = 2, - Exited = 3, -}; - enum class ThreadSchedFlags : u32 { ProcessPauseFlag = 1 << 4, ThreadPauseFlag = 1 << 5, @@ -111,13 +114,20 @@ enum class ThreadSchedFlags : u32 { KernelInitPauseFlag = 1 << 8, }; -enum class ThreadSchedMasks : u32 { - LowMask = 0x000f, - HighMask = 0xfff0, - ForcePauseMask = 0x0070, +enum class ThreadWaitReasonForDebugging : u32 { + None, ///< Thread is not waiting + Sleep, ///< Thread is waiting due to a SleepThread SVC + IPC, ///< Thread is waiting for the reply from an IPC request + Synchronization, ///< Thread is waiting due to a WaitSynchronization SVC + ConditionVar, ///< Thread is waiting due to a WaitProcessWideKey SVC + Arbitration, ///< Thread is waiting due to a SignalToAddress/WaitForAddress SVC + Suspended, ///< Thread is waiting due to process suspension }; -class Thread final : public SynchronizationObject { +class Thread final : public KSynchronizationObject, public boost::intrusive::list_base_hook<> { + friend class KScheduler; + friend class Process; + public: explicit Thread(KernelCore& kernel); ~Thread() override; @@ -127,10 +137,6 @@ public: using ThreadContext32 = Core::ARM_Interface::ThreadContext32; using ThreadContext64 = Core::ARM_Interface::ThreadContext64; - using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; - - using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>; - /** * Creates and returns a new thread. The new thread is immediately scheduled * @param system The instance of the whole system @@ -186,59 +192,54 @@ public: return HANDLE_TYPE; } - bool ShouldWait(const Thread* thread) const override; - void Acquire(Thread* thread) override; - bool IsSignaled() const override; - /** * Gets the thread's current priority * @return The current thread's priority */ - u32 GetPriority() const { + [[nodiscard]] s32 GetPriority() const { return current_priority; } /** + * Sets the thread's current priority. + * @param priority The new priority. + */ + void SetPriority(s32 priority) { + current_priority = priority; + } + + /** * Gets the thread's nominal priority. * @return The current thread's nominal priority. */ - u32 GetNominalPriority() const { - return nominal_priority; + [[nodiscard]] s32 GetBasePriority() const { + return base_priority; } /** - * Sets the thread's current priority - * @param priority The new priority + * Sets the thread's nominal priority. + * @param priority The new priority. */ - void SetPriority(u32 priority); - - /// Adds a thread to the list of threads that are waiting for a lock held by this thread. - void AddMutexWaiter(std::shared_ptr<Thread> thread); - - /// Removes a thread from the list of threads that are waiting for a lock held by this thread. - void RemoveMutexWaiter(std::shared_ptr<Thread> thread); - - /// Recalculates the current priority taking into account priority inheritance. - void UpdatePriority(); + void SetBasePriority(u32 priority); /// Changes the core that the thread is running or scheduled to run on. - ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask); + [[nodiscard]] ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask); /** * Gets the thread's thread ID * @return The thread's ID */ - u64 GetThreadID() const { + [[nodiscard]] u64 GetThreadID() const { return thread_id; } /// Resumes a thread from waiting - void ResumeFromWait(); - - void OnWakeUp(); + void Wakeup(); ResultCode Start(); + virtual bool IsSignaled() const override; + /// Cancels a waiting operation that this thread may or may not be within. /// /// When the thread is within a waiting state, this will set the thread's @@ -247,29 +248,20 @@ public: /// void CancelWait(); - void SetSynchronizationResults(SynchronizationObject* object, ResultCode result); + void SetSynchronizationResults(KSynchronizationObject* object, ResultCode result); - SynchronizationObject* GetSignalingObject() const { - return signaling_object; + void SetSyncedObject(KSynchronizationObject* object, ResultCode result) { + SetSynchronizationResults(object, result); } - ResultCode GetSignalingResult() const { + ResultCode GetWaitResult(KSynchronizationObject** out) const { + *out = signaling_object; return signaling_result; } - /** - * Retrieves the index that this particular object occupies in the list of objects - * that the thread passed to WaitSynchronization, starting the search from the last element. - * - * It is used to set the output index of WaitSynchronization when the thread is awakened. - * - * When a thread wakes up due to an object signal, the kernel will use the index of the last - * matching object in the wait objects list in case of having multiple instances of the same - * object in the list. - * - * @param object Object to query the index of. - */ - s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const; + ResultCode GetSignalingResult() const { + return signaling_result; + } /** * Stops a thread, invalidating it from further use @@ -341,18 +333,22 @@ public: std::shared_ptr<Common::Fiber>& GetHostContext(); - ThreadStatus GetStatus() const { - return status; + ThreadState GetState() const { + return thread_state & ThreadState::Mask; + } + + ThreadState GetRawState() const { + return thread_state; } - void SetStatus(ThreadStatus new_status); + void SetState(ThreadState state); s64 GetLastScheduledTick() const { - return this->last_scheduled_tick; + return last_scheduled_tick; } void SetLastScheduledTick(s64 tick) { - this->last_scheduled_tick = tick; + last_scheduled_tick = tick; } u64 GetTotalCPUTimeTicks() const { @@ -387,98 +383,18 @@ public: return owner_process; } - const ThreadSynchronizationObjects& GetSynchronizationObjects() const { - return *wait_objects; - } - - void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) { - wait_objects = objects; - } - - void ClearSynchronizationObjects() { - for (const auto& waiting_object : *wait_objects) { - waiting_object->RemoveWaitingThread(SharedFrom(this)); - } - wait_objects->clear(); - } - - /// Determines whether all the objects this thread is waiting on are ready. - bool AllSynchronizationObjectsReady() const; - const MutexWaitingThreads& GetMutexWaitingThreads() const { return wait_mutex_threads; } Thread* GetLockOwner() const { - return lock_owner.get(); - } - - void SetLockOwner(std::shared_ptr<Thread> owner) { - lock_owner = std::move(owner); - } - - VAddr GetCondVarWaitAddress() const { - return condvar_wait_address; - } - - void SetCondVarWaitAddress(VAddr address) { - condvar_wait_address = address; - } - - VAddr GetMutexWaitAddress() const { - return mutex_wait_address; - } - - void SetMutexWaitAddress(VAddr address) { - mutex_wait_address = address; - } - - Handle GetWaitHandle() const { - return wait_handle; - } - - void SetWaitHandle(Handle handle) { - wait_handle = handle; - } - - VAddr GetArbiterWaitAddress() const { - return arb_wait_address; - } - - void SetArbiterWaitAddress(VAddr address) { - arb_wait_address = address; - } - - bool HasHLECallback() const { - return hle_callback != nullptr; - } - - void SetHLECallback(HLECallback callback) { - hle_callback = std::move(callback); - } - - void SetHLETimeEvent(Handle time_event) { - hle_time_event = time_event; - } - - void SetHLESyncObject(SynchronizationObject* object) { - hle_object = object; - } - - Handle GetHLETimeEvent() const { - return hle_time_event; - } - - SynchronizationObject* GetHLESyncObject() const { - return hle_object; + return lock_owner; } - void InvalidateHLECallback() { - SetHLECallback(nullptr); + void SetLockOwner(Thread* owner) { + lock_owner = owner; } - bool InvokeHLECallback(std::shared_ptr<Thread> thread); - u32 GetIdealCore() const { return ideal_core; } @@ -493,20 +409,11 @@ public: ResultCode Sleep(s64 nanoseconds); s64 GetYieldScheduleCount() const { - return this->schedule_count; + return schedule_count; } void SetYieldScheduleCount(s64 count) { - this->schedule_count = count; - } - - ThreadSchedStatus GetSchedulingStatus() const { - return static_cast<ThreadSchedStatus>(scheduling_state & - static_cast<u32>(ThreadSchedMasks::LowMask)); - } - - bool IsRunnable() const { - return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable); + schedule_count = count; } bool IsRunning() const { @@ -517,36 +424,32 @@ public: is_running = value; } - bool IsSyncCancelled() const { + bool IsWaitCancelled() const { return is_sync_cancelled; } - void SetSyncCancelled(bool value) { - is_sync_cancelled = value; + void ClearWaitCancelled() { + is_sync_cancelled = false; } Handle GetGlobalHandle() const { return global_handle; } - bool IsWaitingForArbitration() const { - return waiting_for_arbitration; + bool IsCancellable() const { + return is_cancellable; } - void WaitForArbitration(bool set) { - waiting_for_arbitration = set; + void SetCancellable() { + is_cancellable = true; } - bool IsWaitingSync() const { - return is_waiting_on_sync; + void ClearCancellable() { + is_cancellable = false; } - void SetWaitingSync(bool is_waiting) { - is_waiting_on_sync = is_waiting; - } - - bool IsPendingTermination() const { - return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited; + bool IsTerminationRequested() const { + return will_be_terminated || GetRawState() == ThreadState::Terminated; } bool IsPaused() const { @@ -578,21 +481,21 @@ public: constexpr QueueEntry() = default; constexpr void Initialize() { - this->prev = nullptr; - this->next = nullptr; + prev = nullptr; + next = nullptr; } constexpr Thread* GetPrev() const { - return this->prev; + return prev; } constexpr Thread* GetNext() const { - return this->next; + return next; } constexpr void SetPrev(Thread* thread) { - this->prev = thread; + prev = thread; } constexpr void SetNext(Thread* thread) { - this->next = thread; + next = thread; } private: @@ -601,11 +504,11 @@ public: }; QueueEntry& GetPriorityQueueEntry(s32 core) { - return this->per_core_priority_queue_entry[core]; + return per_core_priority_queue_entry[core]; } const QueueEntry& GetPriorityQueueEntry(s32 core) const { - return this->per_core_priority_queue_entry[core]; + return per_core_priority_queue_entry[core]; } s32 GetDisableDispatchCount() const { @@ -622,24 +525,170 @@ public: disable_count--; } + void SetWaitReasonForDebugging(ThreadWaitReasonForDebugging reason) { + wait_reason_for_debugging = reason; + } + + [[nodiscard]] ThreadWaitReasonForDebugging GetWaitReasonForDebugging() const { + return wait_reason_for_debugging; + } + + void SetWaitObjectsForDebugging(const std::span<KSynchronizationObject*>& objects) { + wait_objects_for_debugging.clear(); + wait_objects_for_debugging.reserve(objects.size()); + for (const auto& object : objects) { + wait_objects_for_debugging.emplace_back(object); + } + } + + [[nodiscard]] const std::vector<KSynchronizationObject*>& GetWaitObjectsForDebugging() const { + return wait_objects_for_debugging; + } + + void SetMutexWaitAddressForDebugging(VAddr address) { + mutex_wait_address_for_debugging = address; + } + + [[nodiscard]] VAddr GetMutexWaitAddressForDebugging() const { + return mutex_wait_address_for_debugging; + } + + void AddWaiter(Thread* thread); + + void RemoveWaiter(Thread* thread); + + [[nodiscard]] Thread* RemoveWaiterByKey(s32* out_num_waiters, VAddr key); + + [[nodiscard]] VAddr GetAddressKey() const { + return address_key; + } + + [[nodiscard]] u32 GetAddressKeyValue() const { + return address_key_value; + } + + void SetAddressKey(VAddr key) { + address_key = key; + } + + void SetAddressKey(VAddr key, u32 val) { + address_key = key; + address_key_value = val; + } + private: - friend class GlobalSchedulerContext; - friend class KScheduler; - friend class Process; + static constexpr size_t PriorityInheritanceCountMax = 10; + union SyncObjectBuffer { + std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> sync_objects{}; + std::array<Handle, + Svc::ArgumentHandleCountMax*(sizeof(KSynchronizationObject*) / sizeof(Handle))> + handles; + constexpr SyncObjectBuffer() {} + }; + static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles)); + + struct ConditionVariableComparator { + struct LightCompareType { + u64 cv_key{}; + s32 priority{}; + + [[nodiscard]] constexpr u64 GetConditionVariableKey() const { + return cv_key; + } + + [[nodiscard]] constexpr s32 GetPriority() const { + return priority; + } + }; + + template <typename T> + requires( + std::same_as<T, Thread> || + std::same_as<T, LightCompareType>) static constexpr int Compare(const T& lhs, + const Thread& rhs) { + const uintptr_t l_key = lhs.GetConditionVariableKey(); + const uintptr_t r_key = rhs.GetConditionVariableKey(); + + if (l_key < r_key) { + // Sort first by key + return -1; + } else if (l_key == r_key && lhs.GetPriority() < rhs.GetPriority()) { + // And then by priority. + return -1; + } else { + return 1; + } + } + }; + + Common::IntrusiveRedBlackTreeNode condvar_arbiter_tree_node{}; + + using ConditionVariableThreadTreeTraits = + Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&Thread::condvar_arbiter_tree_node>; + using ConditionVariableThreadTree = + ConditionVariableThreadTreeTraits::TreeType<ConditionVariableComparator>; + +public: + using ConditionVariableThreadTreeType = ConditionVariableThreadTree; + + [[nodiscard]] uintptr_t GetConditionVariableKey() const { + return condvar_key; + } + + [[nodiscard]] uintptr_t GetAddressArbiterKey() const { + return condvar_key; + } - void SetSchedulingStatus(ThreadSchedStatus new_status); + void SetConditionVariable(ConditionVariableThreadTree* tree, VAddr address, uintptr_t cv_key, + u32 value) { + condvar_tree = tree; + condvar_key = cv_key; + address_key = address; + address_key_value = value; + } + + void ClearConditionVariable() { + condvar_tree = nullptr; + } + + [[nodiscard]] bool IsWaitingForConditionVariable() const { + return condvar_tree != nullptr; + } + + void SetAddressArbiter(ConditionVariableThreadTree* tree, uintptr_t address) { + condvar_tree = tree; + condvar_key = address; + } + + void ClearAddressArbiter() { + condvar_tree = nullptr; + } + + [[nodiscard]] bool IsWaitingForAddressArbiter() const { + return condvar_tree != nullptr; + } + + [[nodiscard]] ConditionVariableThreadTree* GetConditionVariableTree() const { + return condvar_tree; + } + + [[nodiscard]] bool HasWaiters() const { + return !waiter_list.empty(); + } + +private: void AddSchedulingFlag(ThreadSchedFlags flag); void RemoveSchedulingFlag(ThreadSchedFlags flag); - - void SetCurrentPriority(u32 new_priority); + void AddWaiterImpl(Thread* thread); + void RemoveWaiterImpl(Thread* thread); + static void RestorePriority(KernelCore& kernel, Thread* thread); Common::SpinLock context_guard{}; ThreadContext32 context_32{}; ThreadContext64 context_64{}; std::shared_ptr<Common::Fiber> host_context{}; - ThreadStatus status = ThreadStatus::Dormant; - u32 scheduling_state = 0; + ThreadState thread_state = ThreadState::Initialized; u64 thread_id = 0; @@ -652,11 +701,11 @@ private: /// Nominal thread priority, as set by the emulated application. /// The nominal priority is the thread priority without priority /// inheritance taken into account. - u32 nominal_priority = 0; + s32 base_priority{}; /// Current thread priority. This may change over the course of the /// thread's lifetime in order to facilitate priority inheritance. - u32 current_priority = 0; + s32 current_priority{}; u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. s64 schedule_count{}; @@ -671,37 +720,27 @@ private: Process* owner_process; /// Objects that the thread is waiting on, in the same order as they were - /// passed to WaitSynchronization. - ThreadSynchronizationObjects* wait_objects; + /// passed to WaitSynchronization. This is used for debugging only. + std::vector<KSynchronizationObject*> wait_objects_for_debugging; - SynchronizationObject* signaling_object; + /// The current mutex wait address. This is used for debugging only. + VAddr mutex_wait_address_for_debugging{}; + + /// The reason the thread is waiting. This is used for debugging only. + ThreadWaitReasonForDebugging wait_reason_for_debugging{}; + + KSynchronizationObject* signaling_object; ResultCode signaling_result{RESULT_SUCCESS}; /// List of threads that are waiting for a mutex that is held by this thread. MutexWaitingThreads wait_mutex_threads; /// Thread that owns the lock that this thread is waiting for. - std::shared_ptr<Thread> lock_owner; - - /// If waiting on a ConditionVariable, this is the ConditionVariable address - VAddr condvar_wait_address = 0; - /// If waiting on a Mutex, this is the mutex address - VAddr mutex_wait_address = 0; - /// The handle used to wait for the mutex. - Handle wait_handle = 0; - - /// If waiting for an AddressArbiter, this is the address being waited on. - VAddr arb_wait_address{0}; - bool waiting_for_arbitration{}; + Thread* lock_owner{}; /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. Handle global_handle = 0; - /// Callback for HLE Events - HLECallback hle_callback; - Handle hle_time_event; - SynchronizationObject* hle_object; - KScheduler* scheduler = nullptr; std::array<QueueEntry, Core::Hardware::NUM_CPU_CORES> per_core_priority_queue_entry{}; @@ -714,7 +753,7 @@ private: u32 pausing_state = 0; bool is_running = false; - bool is_waiting_on_sync = false; + bool is_cancellable = false; bool is_sync_cancelled = false; bool is_continuous_on_svc = false; @@ -725,6 +764,18 @@ private: bool was_running = false; + bool signaled{}; + + ConditionVariableThreadTree* condvar_tree{}; + uintptr_t condvar_key{}; + VAddr address_key{}; + u32 address_key_value{}; + s32 num_kernel_waiters{}; + + using WaiterList = boost::intrusive::list<Thread>; + WaiterList waiter_list{}; + WaiterList pinned_waiter_list{}; + std::string name; }; diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp index 79628e2b4..832edd629 100644 --- a/src/core/hle/kernel/time_manager.cpp +++ b/src/core/hle/kernel/time_manager.cpp @@ -18,12 +18,10 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} { time_manager_event_type = Core::Timing::CreateEvent( "Kernel::TimeManagerCallback", [this](std::uintptr_t thread_handle, std::chrono::nanoseconds) { - const KScopedSchedulerLock lock(system.Kernel()); - const auto proper_handle = static_cast<Handle>(thread_handle); - std::shared_ptr<Thread> thread; { std::lock_guard lock{mutex}; + const auto proper_handle = static_cast<Handle>(thread_handle); if (cancelled_events[proper_handle]) { return; } @@ -32,7 +30,7 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} { if (thread) { // Thread can be null if process has exited - thread->OnWakeUp(); + thread->Wakeup(); } }); } @@ -42,8 +40,7 @@ void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 event_handle = timetask->GetGlobalHandle(); if (nanoseconds > 0) { ASSERT(timetask); - ASSERT(timetask->GetStatus() != ThreadStatus::Ready); - ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex); + ASSERT(timetask->GetState() != ThreadState::Runnable); system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{nanoseconds}, time_manager_event_type, event_handle); } else { diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp index 298f6d520..0bff97a37 100644 --- a/src/core/hle/service/apm/interface.cpp +++ b/src/core/hle/service/apm/interface.cpp @@ -56,7 +56,7 @@ APM::APM(Core::System& system_, std::shared_ptr<Module> apm_, Controller& contro static const FunctionInfo functions[] = { {0, &APM::OpenSession, "OpenSession"}, {1, &APM::GetPerformanceMode, "GetPerformanceMode"}, - {6, nullptr, "IsCpuOverclockEnabled"}, + {6, &APM::IsCpuOverclockEnabled, "IsCpuOverclockEnabled"}, }; RegisterHandlers(functions); } @@ -78,6 +78,14 @@ void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) { rb.PushEnum(controller.GetCurrentPerformanceMode()); } +void APM::IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_APM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(false); +} + APM_Sys::APM_Sys(Core::System& system_, Controller& controller_) : ServiceFramework{system_, "apm:sys"}, controller{controller_} { // clang-format off diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h index 7d57c4978..063ad5308 100644 --- a/src/core/hle/service/apm/interface.h +++ b/src/core/hle/service/apm/interface.h @@ -20,6 +20,7 @@ public: private: void OpenSession(Kernel::HLERequestContext& ctx); void GetPerformanceMode(Kernel::HLERequestContext& ctx); + void IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx); std::shared_ptr<Module> apm; Controller& controller; diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp index 5557da72e..641bcadea 100644 --- a/src/core/hle/service/nfp/nfp.cpp +++ b/src/core/hle/service/nfp/nfp.cpp @@ -190,12 +190,6 @@ private: void GetDeviceState(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_NFP, "called"); - auto nfc_event = nfp_interface.GetNFCEvent(); - if (!nfc_event->ShouldWait(&ctx.GetThread()) && !has_attached_handle) { - device_state = DeviceState::TagFound; - nfc_event->Clear(); - } - IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); rb.Push<u32>(static_cast<u32>(device_state)); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index d8735491c..36970f828 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -11,8 +11,9 @@ namespace Service::Nvidia::Devices { -nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) - : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} nvhost_nvdec::~nvhost_nvdec() = default; NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 79b8b6de1..77ef53cdd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices { class nvhost_nvdec final : public nvhost_nvdec_common { public: - explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_nvdec() override; NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index b49cecb42..4898dc27a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -11,6 +11,7 @@ #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" #include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/memory.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" @@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s } } // Anonymous namespace -nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) - : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {} nvhost_nvdec_common::~nvhost_nvdec_common() = default; NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { @@ -71,10 +73,15 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); offset = SpliceVectors(input, fences, params.fence_count, offset); - // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment - auto& gpu = system.GPU(); - + if (gpu.UseNvdec()) { + for (std::size_t i = 0; i < syncpt_increments.size(); i++) { + const SyncptIncr& syncpt_incr = syncpt_increments[i]; + fences[i].id = syncpt_incr.id; + fences[i].value = + syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments); + } + } for (const auto& cmd_buffer : command_buffers) { auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); @@ -89,7 +96,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u cmdlist.size() * sizeof(u32)); gpu.PushCommandBuffer(cmdlist); } + if (gpu.UseNvdec()) { + fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1); + + Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}}; + gpu.PushCommandBuffer(cmdlist); + } std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); // Some games expect command_buffers to be written back offset = sizeof(IoctlSubmit); @@ -98,6 +111,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u offset = WriteVectors(output, reloc_shifts, offset); offset = WriteVectors(output, syncpt_increments, offset); offset = WriteVectors(output, wait_checks, offset); + offset = WriteVectors(output, fences, offset); return NvResult::Success; } @@ -107,9 +121,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); - // We found that implementing this causes deadlocks with async gpu, along with degraded - // performance. TODO: RE the nvdec async implementation - params.value = 0; + if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) { + device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); + } + params.value = device_syncpoints[params.param]; std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index d9f95ba58..4c9d4ba41 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -10,12 +10,16 @@ #include "common/swap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" -namespace Service::Nvidia::Devices { +namespace Service::Nvidia { +class SyncpointManager; + +namespace Devices { class nvmap; class nvhost_nvdec_common : public nvdevice { public: - explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_nvdec_common() override; protected: @@ -157,8 +161,10 @@ protected: s32_le nvmap_fd{}; u32_le submit_timeout{}; std::shared_ptr<nvmap> nvmap_dev; - + SyncpointManager& syncpoint_manager; + std::array<u32, MaxSyncPoints> device_syncpoints{}; // This is expected to be ordered, therefore we must use a map, not unordered_map std::map<GPUVAddr, BufferMap> buffer_mappings; }; -}; // namespace Service::Nvidia::Devices +}; // namespace Devices +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 805fe86ae..72499654c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -10,8 +10,9 @@ #include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) - : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} nvhost_vic::~nvhost_vic() = default; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index b2e11f4d4..f401c61fa 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -7,11 +7,11 @@ #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { -class nvmap; class nvhost_vic final : public nvhost_nvdec_common { public: - explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_vic(); NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index e03195afe..620c18728 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); - devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); + devices["/dev/nvhost-nvdec"] = + std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager); devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); - devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); + devices["/dev/nvhost-vic"] = + std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager); } Module::~Module() = default; diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index c8c6a4d64..5578181a4 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -26,10 +26,10 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) LOG_WARNING(Service, "Adding graphics buffer {}", slot); { - std::unique_lock lock{queue_mutex}; + std::unique_lock lock{free_buffers_mutex}; free_buffers.push_back(slot); } - condition.notify_one(); + free_buffers_condition.notify_one(); buffers[slot] = { .slot = slot, @@ -48,8 +48,8 @@ std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::Dequeue u32 height) { // Wait for first request before trying to dequeue { - std::unique_lock lock{queue_mutex}; - condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; }); + std::unique_lock lock{free_buffers_mutex}; + free_buffers_condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; }); } if (!is_connect) { @@ -58,7 +58,7 @@ std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::Dequeue return std::nullopt; } - std::unique_lock lock{queue_mutex}; + std::unique_lock lock{free_buffers_mutex}; auto f_itr = free_buffers.begin(); auto slot = buffers.size(); @@ -100,6 +100,7 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, buffers[slot].crop_rect = crop_rect; buffers[slot].swap_interval = swap_interval; buffers[slot].multi_fence = multi_fence; + std::unique_lock lock{queue_sequence_mutex}; queue_sequence.push_back(slot); } @@ -113,15 +114,16 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult buffers[slot].swap_interval = 0; { - std::unique_lock lock{queue_mutex}; + std::unique_lock lock{free_buffers_mutex}; free_buffers.push_back(slot); } - condition.notify_one(); + free_buffers_condition.notify_one(); buffer_wait_event.writable->Signal(); } std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { + std::unique_lock lock{queue_sequence_mutex}; std::size_t buffer_slot = buffers.size(); // Iterate to find a queued buffer matching the requested slot. while (buffer_slot == buffers.size() && !queue_sequence.empty()) { @@ -147,27 +149,29 @@ void BufferQueue::ReleaseBuffer(u32 slot) { buffers[slot].status = Buffer::Status::Free; { - std::unique_lock lock{queue_mutex}; + std::unique_lock lock{free_buffers_mutex}; free_buffers.push_back(slot); } - condition.notify_one(); + free_buffers_condition.notify_one(); buffer_wait_event.writable->Signal(); } void BufferQueue::Connect() { + std::unique_lock lock{queue_sequence_mutex}; queue_sequence.clear(); - id = 1; - layer_id = 1; is_connect = true; } void BufferQueue::Disconnect() { buffers.fill({}); - queue_sequence.clear(); + { + std::unique_lock lock{queue_sequence_mutex}; + queue_sequence.clear(); + } buffer_wait_event.writable->Signal(); is_connect = false; - condition.notify_one(); + free_buffers_condition.notify_one(); } u32 BufferQueue::Query(QueryType type) { @@ -176,9 +180,11 @@ u32 BufferQueue::Query(QueryType type) { switch (type) { case QueryType::NativeWindowFormat: return static_cast<u32>(PixelFormat::RGBA8888); + case QueryType::NativeWindowWidth: + case QueryType::NativeWindowHeight: + break; } - - UNIMPLEMENTED(); + UNIMPLEMENTED_MSG("Unimplemented query type={}", type); return 0; } diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index a2f60d9eb..ad7469277 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -129,8 +129,10 @@ private: std::list<u32> queue_sequence; Kernel::EventPair buffer_wait_event; - std::mutex queue_mutex; - std::condition_variable condition; + std::mutex free_buffers_mutex; + std::condition_variable free_buffers_condition; + + std::mutex queue_sequence_mutex; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 4b3581949..ceaa93d28 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -38,6 +38,10 @@ void NVFlinger::SplitVSync() { system.RegisterHostThread(); std::string name = "yuzu:VSyncThread"; MicroProfileOnThreadCreate(name.c_str()); + + // Cleanup + SCOPE_EXIT({ MicroProfileOnThreadExit(); }); + Common::SetCurrentThreadName(name.c_str()); Common::SetCurrentThreadPriority(Common::ThreadPriority::High); s64 delay = 0; diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index 4da69f503..2b91a89d1 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp @@ -139,9 +139,6 @@ void SM::GetService(Kernel::HLERequestContext& ctx) { server_port->AppendPendingSession(server); } - // Wake the threads waiting on the ServerPort - server_port->Signal(); - LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId()); IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp index c822d21b8..ca61d72ca 100644 --- a/src/core/hle/service/sockets/sockets_translate.cpp +++ b/src/core/hle/service/sockets/sockets_translate.cpp @@ -64,6 +64,7 @@ Network::Type Translate(Type type) { return Network::Type::DGRAM; default: UNIMPLEMENTED_MSG("Unimplemented type={}", type); + return Network::Type{}; } } diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index d91c15561..e4f5fd40c 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp @@ -185,6 +185,10 @@ constexpr std::array<const char*, 66> RESULT_MESSAGES{ "The INI file contains more than the maximum allowable number of KIP files.", }; +std::string GetResultStatusString(ResultStatus status) { + return RESULT_MESSAGES.at(static_cast<std::size_t>(status)); +} + std::ostream& operator<<(std::ostream& os, ResultStatus status) { os << RESULT_MESSAGES.at(static_cast<std::size_t>(status)); return os; diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index 36e79e71d..b2e5b13de 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -135,6 +135,7 @@ enum class ResultStatus : u16 { ErrorINITooManyKIPs, }; +std::string GetResultStatusString(ResultStatus status); std::ostream& operator<<(std::ostream& os, ResultStatus status); /// Interface for loading an application diff --git a/src/core/settings.h b/src/core/settings.h index 1cb7ff7f5..a324530bd 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -131,6 +131,7 @@ struct Values { bool cpuopt_unsafe_unfuse_fma; bool cpuopt_unsafe_reduce_fp_error; + bool cpuopt_unsafe_inaccurate_nan; // Renderer Setting<RendererBackend> renderer_backend; diff --git a/src/tests/common/ring_buffer.cpp b/src/tests/common/ring_buffer.cpp index c883c4d56..54def22da 100644 --- a/src/tests/common/ring_buffer.cpp +++ b/src/tests/common/ring_buffer.cpp @@ -20,60 +20,60 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") { for (std::size_t i = 0; i < 4; i++) { const char elem = static_cast<char>(i); const std::size_t count = buf.Push(&elem, 1); - REQUIRE(count == 1); + REQUIRE(count == 1U); } - REQUIRE(buf.Size() == 4); + REQUIRE(buf.Size() == 4U); // Pushing values into a full ring buffer should fail. { const char elem = static_cast<char>(42); const std::size_t count = buf.Push(&elem, 1); - REQUIRE(count == 0); + REQUIRE(count == 0U); } - REQUIRE(buf.Size() == 4); + REQUIRE(buf.Size() == 4U); // Popping multiple values from a ring buffer with values should succeed. { const std::vector<char> popped = buf.Pop(2); - REQUIRE(popped.size() == 2); + REQUIRE(popped.size() == 2U); REQUIRE(popped[0] == 0); REQUIRE(popped[1] == 1); } - REQUIRE(buf.Size() == 2); + REQUIRE(buf.Size() == 2U); // Popping a single value from a ring buffer with values should succeed. { const std::vector<char> popped = buf.Pop(1); - REQUIRE(popped.size() == 1); + REQUIRE(popped.size() == 1U); REQUIRE(popped[0] == 2); } - REQUIRE(buf.Size() == 1); + REQUIRE(buf.Size() == 1U); // Pushing more values than space available should partially suceed. { std::vector<char> to_push(6); std::iota(to_push.begin(), to_push.end(), 88); const std::size_t count = buf.Push(to_push); - REQUIRE(count == 3); + REQUIRE(count == 3U); } - REQUIRE(buf.Size() == 4); + REQUIRE(buf.Size() == 4U); // Doing an unlimited pop should pop all values. { const std::vector<char> popped = buf.Pop(); - REQUIRE(popped.size() == 4); + REQUIRE(popped.size() == 4U); REQUIRE(popped[0] == 3); REQUIRE(popped[1] == 88); REQUIRE(popped[2] == 89); REQUIRE(popped[3] == 90); } - REQUIRE(buf.Size() == 0); + REQUIRE(buf.Size() == 0U); } TEST_CASE("RingBuffer: Threaded Test", "[common]") { @@ -93,7 +93,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") { std::size_t i = 0; while (i < count) { if (const std::size_t c = buf.Push(&value[0], 1); c > 0) { - REQUIRE(c == 1); + REQUIRE(c == 1U); i++; next_value(value); } else { @@ -108,7 +108,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") { std::size_t i = 0; while (i < count) { if (const std::vector<char> v = buf.Pop(1); v.size() > 0) { - REQUIRE(v.size() == 2); + REQUIRE(v.size() == 2U); REQUIRE(v[0] == value[0]); REQUIRE(v[1] == value[1]); i++; @@ -123,7 +123,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") { producer.join(); consumer.join(); - REQUIRE(buf.Size() == 0); + REQUIRE(buf.Size() == 0U); printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e050f9aed..f7b9d7f86 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -115,8 +115,6 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h - renderer_vulkan/nsight_aftermath_tracker.cpp - renderer_vulkan/nsight_aftermath_tracker.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -131,8 +129,6 @@ add_library(video_core STATIC renderer_vulkan/vk_compute_pipeline.h renderer_vulkan/vk_descriptor_pool.cpp renderer_vulkan/vk_descriptor_pool.h - renderer_vulkan/vk_device.cpp - renderer_vulkan/vk_device.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h renderer_vulkan/vk_graphics_pipeline.cpp @@ -167,8 +163,6 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h - renderer_vulkan/wrapper.cpp - renderer_vulkan/wrapper.h shader_cache.h shader_notify.cpp shader_notify.h @@ -257,6 +251,20 @@ add_library(video_core STATIC textures/texture.h video_core.cpp video_core.h + vulkan_common/vulkan_debug_callback.cpp + vulkan_common/vulkan_debug_callback.h + vulkan_common/vulkan_device.cpp + vulkan_common/vulkan_device.h + vulkan_common/vulkan_instance.cpp + vulkan_common/vulkan_instance.h + vulkan_common/vulkan_library.cpp + vulkan_common/vulkan_library.h + vulkan_common/vulkan_surface.cpp + vulkan_common/vulkan_surface.h + vulkan_common/vulkan_wrapper.cpp + vulkan_common/vulkan_wrapper.h + vulkan_common/nsight_aftermath_tracker.cpp + vulkan_common/nsight_aftermath_tracker.h ) create_target_directory_groups(video_core) diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index e3e7432f7..94679d5d1 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_) : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), host1x_processor(std::make_unique<Host1x>(gpu)), - nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), - vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {} + sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {} CDmaPusher::~CDmaPusher() = default; @@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast<u32>(data & 0xFF); const auto cond = static_cast<u32>((data >> 8) & 0xFF); if (cond == 0) { - nvdec_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); - nvdec_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id)); } break; } @@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast<u32>(data & 0xFF); const auto cond = static_cast<u32>((data >> 8) & 0xFF); if (cond == 0) { - vic_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); - vic_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id)); } break; } diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 0db1cd646..8ca70b6dd 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -116,12 +116,10 @@ private: void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); GPU& gpu; - - std::shared_ptr<Nvdec> nvdec_processor; - std::unique_ptr<Vic> vic_processor; - std::unique_ptr<Host1x> host1x_processor; - std::unique_ptr<SyncptIncrManager> nvdec_sync; - std::unique_ptr<SyncptIncrManager> vic_sync; + std::shared_ptr<Tegra::Nvdec> nvdec_processor; + std::unique_ptr<Tegra::Vic> vic_processor; + std::unique_ptr<Tegra::Host1x> host1x_processor; + std::unique_ptr<SyncptIncrManager> sync_manager; ChClassId current_class{}; ThiRegisters vic_thi_state{}; ThiRegisters nvdec_thi_state{}; diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index c4dd4881a..b12494528 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} Tegra::Host1x::~Host1x() = default; -void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32); - std::memcpy(state_offset, &arguments, sizeof(u32)); -} - -void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) { - StateWrite(static_cast<u32>(method), arguments[0]); +void Tegra::Host1x::ProcessMethod(Method method, u32 argument) { switch (method) { - case Method::WaitSyncpt: - Execute(arguments[0]); - break; case Method::LoadSyncptPayload32: - syncpoint_value = arguments[0]; + syncpoint_value = argument; break; + case Method::WaitSyncpt: case Method::WaitSyncpt32: - Execute(arguments[0]); + Execute(argument); break; default: UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method)); @@ -34,6 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen } void Tegra::Host1x::Execute(u32 data) { - // This method waits on a valid syncpoint. - // TODO: Implement when proper Async is in place + gpu.WaitFence(data, syncpoint_value); } diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h index 013eaa0c1..7e94799dd 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/command_classes/host1x.h @@ -14,64 +14,23 @@ class Nvdec; class Host1x { public: - struct Host1xClassRegisters { - u32 incr_syncpt{}; - u32 incr_syncpt_ctrl{}; - u32 incr_syncpt_error{}; - INSERT_PADDING_WORDS(5); - u32 wait_syncpt{}; - u32 wait_syncpt_base{}; - u32 wait_syncpt_incr{}; - u32 load_syncpt_base{}; - u32 incr_syncpt_base{}; - u32 clear{}; - u32 wait{}; - u32 wait_with_interrupt{}; - u32 delay_use{}; - u32 tick_count_high{}; - u32 tick_count_low{}; - u32 tick_ctrl{}; - INSERT_PADDING_WORDS(23); - u32 ind_ctrl{}; - u32 ind_off2{}; - u32 ind_off{}; - std::array<u32, 31> ind_data{}; - INSERT_PADDING_WORDS(1); - u32 load_syncpoint_payload32{}; - u32 stall_ctrl{}; - u32 wait_syncpt32{}; - u32 wait_syncpt_base32{}; - u32 load_syncpt_base32{}; - u32 incr_syncpt_base32{}; - u32 stall_count_high{}; - u32 stall_count_low{}; - u32 xref_ctrl{}; - u32 channel_xref_high{}; - u32 channel_xref_low{}; - }; - static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); - enum class Method : u32 { - WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, - LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, - WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, + WaitSyncpt = 0x8, + LoadSyncptPayload32 = 0x4e, + WaitSyncpt32 = 0x50, }; explicit Host1x(GPU& gpu); ~Host1x(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, const std::vector<u32>& arguments); + void ProcessMethod(Method method, u32 argument); private: /// For Host1x, execute is waiting on a syncpoint previously written into the state void Execute(u32 data); - /// Write argument into the provided offset - void StateWrite(u32 offset, u32 arguments); - u32 syncpoint_value{}; - Host1xClassRegisters state{}; GPU& gpu; }; diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index 1619d8664..acf2668dc 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -10,9 +10,7 @@ #include "video_core/surface.h" namespace VideoCore::Surface { - namespace { - using Table = std::array<std::array<u64, 2>, MaxPixelFormat>; // Compatibility table taken from Table 3.X.2 in: @@ -233,10 +231,13 @@ constexpr Table MakeCopyTable() { EnableRange(copy, COPY_CLASS_64_BITS); return copy; } - } // Anonymous namespace -bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) { +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) { + if (broken_views) { + // If format views are broken, only accept formats that are identical. + return format_a == format_b; + } static constexpr Table TABLE = MakeViewTable(); return IsSupported(TABLE, format_a, format_b); } diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index b5eb03bea..9a0522988 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -8,7 +8,7 @@ namespace VideoCore::Surface { -bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b); +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views); bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b24179d59..81b71edfb 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -208,6 +208,7 @@ Device::Device() const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; + const bool is_intel = vendor == "Intel"; bool disable_fast_buffer_sub_data = false; if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { @@ -231,6 +232,7 @@ Device::Device() has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); + has_broken_texture_view_formats = is_amd || is_intel; has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); @@ -248,6 +250,8 @@ Device::Device() LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); + LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", + has_broken_texture_view_formats); if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 13e66846c..3e79d1e37 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -96,6 +96,10 @@ public: return has_precise_bug; } + bool HasBrokenTextureViewFormats() const { + return has_broken_texture_view_formats; + } + bool HasFastBufferSubData() const { return has_fast_buffer_sub_data; } @@ -137,6 +141,7 @@ private: bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; + bool has_broken_texture_view_formats{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; bool has_debugging_tool_attached{}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 4c690418c..546cb6d00 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -61,100 +61,99 @@ struct FormatTuple { GLenum internal_format; GLenum format = GL_NONE; GLenum type = GL_NONE; - GLenum store_format = internal_format; }; constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; @@ -431,6 +430,8 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& format_properties[i].emplace(format, properties); } } + has_broken_texture_view_formats = device.HasBrokenTextureViewFormats(); + null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); @@ -651,13 +652,11 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, if (IsConverted(runtime.device, info.format, info.type)) { flags |= ImageFlagBits::Converted; gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; - gl_store_format = GL_RGBA8; gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { const auto& tuple = GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; - gl_store_format = tuple.store_format; gl_format = tuple.format; gl_type = tuple.type; } @@ -677,23 +676,23 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, } switch (target) { case GL_TEXTURE_1D_ARRAY: - glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers); + glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); break; case GL_TEXTURE_2D_ARRAY: - glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers); + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); break; case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { // TODO: Where should 'fixedsamplelocations' come from? const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); - glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, + glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, height >> samples_y, num_layers, GL_FALSE); break; } case GL_TEXTURE_RECTANGLE: - glTextureStorage2D(handle, num_levels, gl_store_format, width, height); + glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); break; case GL_TEXTURE_3D: - glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: buffer.Create(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 04193e31e..15b7c3676 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -96,6 +96,10 @@ public: FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; + bool HasBrokenTextureViewFormats() const noexcept { + return has_broken_texture_view_formats; + } + private: struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); @@ -120,6 +124,7 @@ private: UtilShaders util_shaders; std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; + bool has_broken_texture_view_formats = false; StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; @@ -165,7 +170,6 @@ private: OGLTextureView store_view; OGLBuffer buffer; GLenum gl_internal_format = GL_NONE; - GLenum gl_store_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; }; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 87c8e5693..1f6a169ae 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -11,14 +11,14 @@ #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -225,7 +225,7 @@ constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages( }; } -void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, +void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, VkSampler sampler, VkImageView image_view) { const VkDescriptorImageInfo image_info{ .sampler = sampler, @@ -247,7 +247,7 @@ void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descr device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); } -void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, +void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, VkSampler sampler, VkImageView image_view_0, VkImageView image_view_1) { const VkDescriptorImageInfo image_info_0{ @@ -326,7 +326,7 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, } // Anonymous namespace -BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_, +BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 2c2790bf9..43fd3d737 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -8,19 +8,18 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/texture_cache/types.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { using VideoCommon::Offset2D; -class VKDevice; -class VKScheduler; -class StateTracker; - +class Device; class Framebuffer; class ImageView; +class StateTracker; +class VKScheduler; struct BlitImagePipelineKey { constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; @@ -31,7 +30,7 @@ struct BlitImagePipelineKey { class BlitImageHelper { public: - explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler, + explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); ~BlitImageHelper(); @@ -67,7 +66,7 @@ private: void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); - const VKDevice& device; + const Device& device; VKScheduler& scheduler; StateTracker& state_tracker; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 67dd10500..5be6dabd9 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -76,7 +76,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; } - for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; @@ -85,6 +85,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast<u32>(input.type.Value())); attribute.size.Assign(static_cast<u32>(input.size.Value())); + attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); } for (std::size_t index = 0; index < std::size(attachments); ++index) { @@ -172,14 +173,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); cull_face.Assign(PackCullFace(regs.cull_face)); cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); - - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const auto& input = regs.vertex_array[index]; - VertexBinding& binding = vertex_bindings[index]; - binding.raw = 0; - binding.enabled.Assign(input.IsEnabled() ? 1 : 0); - binding.stride.Assign(static_cast<u16>(input.stride.Value())); - } + std::ranges::transform(regs.vertex_array, vertex_strides.begin(), [](const auto& array) { + return static_cast<u16>(array.stride.Value()); + }); } std::size_t FixedPipelineState::Hash() const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 7e95e6fce..465a55fdb 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -96,6 +96,8 @@ struct FixedPipelineState { BitField<6, 14, u32> offset; BitField<20, 3, u32> type; BitField<23, 6, u32> size; + // Not really an element of a vertex attribute, but it can be packed here + BitField<29, 1, u32> binding_index_enabled; constexpr Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); @@ -130,12 +132,6 @@ struct FixedPipelineState { } }; - union VertexBinding { - u16 raw; - BitField<0, 12, u16> stride; - BitField<12, 1, u16> enabled; - }; - struct DynamicState { union { u32 raw1; @@ -153,7 +149,8 @@ struct FixedPipelineState { BitField<0, 2, u32> cull_face; BitField<2, 1, u32> cull_enable; }; - std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings; + // Vertex stride is a 12 bits value, we have 4 bits to spare per element + std::array<u16, Maxwell::NumVertexArrays> vertex_strides; void Fill(const Maxwell& regs); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 4c988429f..ca7c2c579 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -9,9 +9,9 @@ #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan::MaxwellToVK { @@ -47,7 +47,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter return {}; } -VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, +VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode, Tegra::Texture::TextureFilter filter) { switch (wrap_mode) { case Tegra::Texture::WrapMode::Wrap: @@ -222,7 +222,7 @@ constexpr bool IsZetaFormat(PixelFormat pixel_format) { } // Anonymous namespace -FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) { +FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format) { ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; @@ -280,7 +280,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { return {}; } -VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, +VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: @@ -526,7 +526,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { return {}; } -VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { +VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { switch (index_format) { case Maxwell::IndexFormat::UnsignedByte: if (!device.IsExtIndexTypeUint8Supported()) { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 1a90f192e..537969840 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -6,10 +6,10 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan::MaxwellToVK { @@ -22,7 +22,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter); VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); -VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, +VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode, Tegra::Texture::TextureFilter filter); VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); @@ -35,17 +35,17 @@ struct FormatInfo { bool storage; }; -FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); +FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format); VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); -VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology); +VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); -VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); +VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7f521cb9b..d7437e185 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -12,8 +12,6 @@ #include <fmt/format.h> -#include "common/dynamic_library.h" -#include "common/file_util.h" #include "common/logging/log.h" #include "common/telemetry.h" #include "core/core.h" @@ -24,182 +22,27 @@ #include "video_core/gpu.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/renderer_vulkan/wrapper.h" - -// Include these late to avoid polluting previous headers -#ifdef _WIN32 -#include <windows.h> -// ensure include order -#include <vulkan/vulkan_win32.h> -#endif - -#if !defined(_WIN32) && !defined(__APPLE__) -#include <X11/Xlib.h> -#include <vulkan/vulkan_wayland.h> -#include <vulkan/vulkan_xlib.h> -#endif +#include "video_core/vulkan_common/vulkan_debug_callback.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_instance.h" +#include "video_core/vulkan_common/vulkan_library.h" +#include "video_core/vulkan_common/vulkan_surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - namespace { - -using Core::Frontend::WindowSystemType; - -VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, - VkDebugUtilsMessageTypeFlagsEXT type, - const VkDebugUtilsMessengerCallbackDataEXT* data, - [[maybe_unused]] void* user_data) { - const char* const message{data->pMessage}; - - if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { - LOG_CRITICAL(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { - LOG_WARNING(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { - LOG_INFO(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { - LOG_DEBUG(Render_Vulkan, "{}", message); - } - return VK_FALSE; -} - -Common::DynamicLibrary OpenVulkanLibrary() { - Common::DynamicLibrary library; -#ifdef __APPLE__ - // Check if a path to a specific Vulkan library has been specified. - char* libvulkan_env = getenv("LIBVULKAN_PATH"); - if (!libvulkan_env || !library.Open(libvulkan_env)) { - // Use the libvulkan.dylib from the application bundle. - const std::string filename = - Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; - library.Open(filename.c_str()); - } -#else - std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); - if (!library.Open(filename.c_str())) { - // Android devices may not have libvulkan.so.1, only libvulkan.so. - filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); - (void)library.Open(filename.c_str()); - } -#endif - return library; -} - -std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library, - vk::InstanceDispatch& dld, WindowSystemType window_type, - bool enable_debug_utils, bool enable_layers) { - if (!library.IsOpen()) { - LOG_ERROR(Render_Vulkan, "Vulkan library not available"); - return {}; - } - if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { - LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); - return {}; - } - if (!vk::Load(dld)) { - LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); - return {}; - } - - std::vector<const char*> extensions; - extensions.reserve(6); - switch (window_type) { - case Core::Frontend::WindowSystemType::Headless: - break; -#ifdef _WIN32 - case Core::Frontend::WindowSystemType::Windows: - extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); - break; -#endif -#if !defined(_WIN32) && !defined(__APPLE__) - case Core::Frontend::WindowSystemType::X11: - extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); - break; - case Core::Frontend::WindowSystemType::Wayland: - extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); - break; -#endif - default: - LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); - break; - } - if (window_type != Core::Frontend::WindowSystemType::Headless) { - extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); - } - if (enable_debug_utils) { - extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } - extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); - - const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); - if (!properties) { - LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); - return {}; - } - - for (const char* extension : extensions) { - const auto it = - std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) { - return !std::strcmp(extension, prop.extensionName); - }); - if (it == properties->end()) { - LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); - return {}; - } - } - - std::vector<const char*> layers; - layers.reserve(1); - if (enable_layers) { - layers.push_back("VK_LAYER_KHRONOS_validation"); - } - - const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); - if (!layer_properties) { - LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); - layers.clear(); - } - - for (auto layer_it = layers.begin(); layer_it != layers.end();) { - const char* const layer = *layer_it; - const auto it = std::find_if( - layer_properties->begin(), layer_properties->end(), - [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); - if (it == layer_properties->end()) { - LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); - layer_it = layers.erase(layer_it); - } else { - ++layer_it; - } - } - - // Limit the maximum version of Vulkan to avoid using untested version. - const u32 version = std::min(vk::AvailableVersion(dld), static_cast<u32>(VK_API_VERSION_1_1)); - - vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld); - if (!instance) { - LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); - return {}; - } - if (!vk::Load(*instance, dld)) { - LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); - } - return std::make_pair(std::move(instance), version); -} - std::string GetReadableVersion(u32 version) { return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), VK_VERSION_PATCH(version)); } -std::string GetDriverVersion(const VKDevice& device) { +std::string GetDriverVersion(const Device& device) { // Extracted from // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 const u32 version = device.GetDriverVersion(); @@ -216,7 +59,6 @@ std::string GetDriverVersion(const VKDevice& device) { const u32 minor = version & 0x3fff; return fmt::format("{}.{}", major, minor); } - return GetReadableVersion(version); } @@ -255,7 +97,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - const auto& layout = render_window.GetFramebufferLayout(); if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; @@ -284,14 +125,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.OnFrameDisplayed(); } -bool RendererVulkan::Init() { - library = OpenVulkanLibrary(); - std::tie(instance, instance_version) = CreateInstance( - library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); - if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { - return false; +bool RendererVulkan::Init() try { + library = OpenLibrary(); + instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, + true, Settings::values.renderer_debug); + if (Settings::values.renderer_debug) { + debug_callback = CreateDebugCallback(instance); } + surface = CreateSurface(instance, render_window); + InitializeDevice(); Report(); memory_manager = std::make_unique<VKMemoryManager>(*device); @@ -311,8 +154,11 @@ bool RendererVulkan::Init() { blit_screen = std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, *memory_manager, *swapchain, *scheduler, screen_info); - return true; + +} catch (const vk::Exception& exception) { + LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); + return false; } void RendererVulkan::ShutDown() { @@ -322,7 +168,6 @@ void RendererVulkan::ShutDown() { if (const auto& dev = device->GetLogical()) { dev.WaitIdle(); } - rasterizer.reset(); blit_screen.reset(); scheduler.reset(); @@ -331,95 +176,15 @@ void RendererVulkan::ShutDown() { device.reset(); } -bool RendererVulkan::CreateDebugCallback() { - if (!Settings::values.renderer_debug) { - return true; - } - debug_callback = instance.TryCreateDebugCallback(DebugCallback); - if (!debug_callback) { - LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); - return false; - } - return true; -} - -bool RendererVulkan::CreateSurface() { - [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); - VkSurfaceKHR unsafe_surface = nullptr; - -#ifdef _WIN32 - if (window_info.type == Core::Frontend::WindowSystemType::Windows) { - const HWND hWnd = static_cast<HWND>(window_info.render_surface); - const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, - nullptr, 0, nullptr, hWnd}; - const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( - dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); - if (!vkCreateWin32SurfaceKHR || - vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); - return false; - } - } -#endif -#if !defined(_WIN32) && !defined(__APPLE__) - if (window_info.type == Core::Frontend::WindowSystemType::X11) { - const VkXlibSurfaceCreateInfoKHR xlib_ci{ - VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, - static_cast<Display*>(window_info.display_connection), - reinterpret_cast<Window>(window_info.render_surface)}; - const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( - dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); - if (!vkCreateXlibSurfaceKHR || - vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); - return false; - } - } - if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { - const VkWaylandSurfaceCreateInfoKHR wayland_ci{ - VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, - static_cast<wl_display*>(window_info.display_connection), - static_cast<wl_surface*>(window_info.render_surface)}; - const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( - dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); - if (!vkCreateWaylandSurfaceKHR || - vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != - VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); - return false; - } - } -#endif - if (!unsafe_surface) { - LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); - return false; - } - - surface = vk::SurfaceKHR(unsafe_surface, *instance, dld); - return true; -} - -bool RendererVulkan::PickDevices() { - const auto devices = instance.EnumeratePhysicalDevices(); - if (!devices) { - LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices"); - return false; - } - +void RendererVulkan::InitializeDevice() { + const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); const s32 device_index = Settings::values.vulkan_device.GetValue(); - if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) { + if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); - return false; - } - const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)], - dld); - if (!VKDevice::IsSuitable(physical_device, *surface)) { - return false; + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } - - device = - std::make_unique<VKDevice>(*instance, instance_version, physical_device, *surface, dld); - return device->Create(); + const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld); + device = std::make_unique<Device>(*instance, physical_device, *surface, dld); } void RendererVulkan::Report() const { @@ -444,26 +209,21 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -std::vector<std::string> RendererVulkan::EnumerateDevices() { +std::vector<std::string> RendererVulkan::EnumerateDevices() try { vk::InstanceDispatch dld; - Common::DynamicLibrary library = OpenVulkanLibrary(); - vk::Instance instance = - CreateInstance(library, dld, WindowSystemType::Headless, false, false).first; - if (!instance) { - return {}; - } - - const std::optional physical_devices = instance.EnumeratePhysicalDevices(); - if (!physical_devices) { - return {}; - } - + const Common::DynamicLibrary library = OpenLibrary(); + const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); + const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); std::vector<std::string> names; - names.reserve(physical_devices->size()); - for (const auto& device : *physical_devices) { + names.reserve(physical_devices.size()); + for (const VkPhysicalDevice device : physical_devices) { names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); } return names; + +} catch (const vk::Exception& exception) { + LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what()); + return {}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 74642fba4..5575ffc54 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -11,7 +11,7 @@ #include "common/dynamic_library.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class TelemetrySession; @@ -27,9 +27,9 @@ class GPU; namespace Vulkan { +class Device; class StateTracker; class VKBlitScreen; -class VKDevice; class VKMemoryManager; class VKSwapchain; class VKScheduler; @@ -56,11 +56,7 @@ public: static std::vector<std::string> EnumerateDevices(); private: - bool CreateDebugCallback(); - - bool CreateSurface(); - - bool PickDevices(); + void InitializeDevice(); void Report() const; @@ -72,14 +68,13 @@ private: vk::InstanceDispatch dld; vk::Instance instance; - u32 instance_version{}; vk::SurfaceKHR surface; VKScreenInfo screen_info; - vk::DebugCallback debug_callback; - std::unique_ptr<VKDevice> device; + vk::DebugUtilsMessenger debug_callback; + std::unique_ptr<Device> device; std::unique_ptr<VKMemoryManager> memory_manager; std::unique_ptr<StateTracker> state_tracker; std::unique_ptr<VKScheduler> scheduler; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index d3a83f22f..5e184eb42 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -21,15 +21,15 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -114,7 +114,7 @@ struct VKBlitScreen::BufferData { VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, - VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_, + VideoCore::RasterizerInterface& rasterizer_, const Device& device_, VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, VKScheduler& scheduler_, const VKScreenInfo& screen_info_) : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 2ee374247..69ed61770 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -7,7 +7,7 @@ #include <memory> #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; @@ -33,8 +33,8 @@ namespace Vulkan { struct ScreenInfo; +class Device; class RasterizerVulkan; -class VKDevice; class VKScheduler; class VKSwapchain; @@ -42,7 +42,7 @@ class VKBlitScreen final { public: explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VideoCore::RasterizerInterface& rasterizer, const Device& device, VKMemoryManager& memory_manager, VKSwapchain& swapchain, VKScheduler& scheduler, const VKScreenInfo& screen_info); ~VKBlitScreen(); @@ -85,7 +85,7 @@ private: Core::Memory::Memory& cpu_memory; Core::Frontend::EmuWindow& render_window; VideoCore::RasterizerInterface& rasterizer; - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; VKSwapchain& swapchain; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 10d296c2f..4d517c547 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -9,10 +9,10 @@ #include "core/core.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -34,13 +34,13 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; -std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { +std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const Device& device, VKScheduler& scheduler) { return std::make_unique<VKStreamBuffer>(device, scheduler); } } // Anonymous namespace -Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, +Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ staging_pool_} { @@ -168,7 +168,7 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const VKDevice& device_, VKMemoryManager& memory_manager_, + const Device& device_, VKMemoryManager& memory_manager_, VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, VKStagingBufferPool& staging_pool_) : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index daf498222..1c39aed34 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -11,17 +11,17 @@ #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class VKMemoryManager; class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: - explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, + explicit Buffer(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); ~Buffer(); @@ -41,7 +41,7 @@ public: } private: - const VKDevice& device; + const Device& device; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; @@ -52,7 +52,7 @@ class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VK public: explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const VKDevice& device, VKMemoryManager& memory_manager, + const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStreamBuffer& stream_buffer, VKStagingBufferPool& staging_pool); ~VKBufferCache(); @@ -63,7 +63,7 @@ protected: std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; private: - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index 8f7d6410e..a99df9323 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -5,8 +5,8 @@ #include <cstddef> #include "video_core/renderer_vulkan/vk_command_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -17,7 +17,7 @@ struct CommandPool::Pool { vk::CommandBuffers cmdbufs; }; -CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_) +CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_) : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {} CommandPool::~CommandPool() = default; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index 62a7ce3f1..61c26a22a 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -8,16 +8,16 @@ #include <vector> #include "video_core/renderer_vulkan/vk_resource_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +class Device; class MasterSemaphore; -class VKDevice; class CommandPool final : public ResourcePool { public: - explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_); + explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_); ~CommandPool() override; void Allocate(size_t begin, size_t end) override; @@ -27,7 +27,7 @@ public: private: struct Pool; - const VKDevice& device; + const Device& device; std::vector<Pool> pools; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2c030e910..02a6d54b7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -15,11 +15,11 @@ #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -86,7 +86,7 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { } // Anonymous namespace -VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, +VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, vk::Span<VkDescriptorSetLayoutBinding> bindings, vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, vk::Span<VkPushConstantRange> push_constants, @@ -162,7 +162,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return set; } -QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, +QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) @@ -211,7 +211,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 return {*buffer.handle, 0}; } -Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_, +Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), @@ -255,7 +255,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff return {*buffer.handle, 0}; } -QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, +QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index abdf61e2c..7ddb09afb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -11,18 +11,18 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; class VKStagingBufferPool; class VKUpdateDescriptorQueue; class VKComputePass { public: - explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, + explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, vk::Span<VkDescriptorSetLayoutBinding> bindings, vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); @@ -43,7 +43,7 @@ private: class QuadArrayPass final : public VKComputePass { public: - explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, + explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); @@ -59,7 +59,7 @@ private: class Uint8Pass final : public VKComputePass { public: - explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, + explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); @@ -75,7 +75,7 @@ private: class QuadIndexedPass final : public VKComputePass { public: - explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, + explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 62f44d6da..3a48219b7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,16 +6,16 @@ #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, +VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, const SPIRVShader& shader_) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 49e2113a2..7e16575ac 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -7,17 +7,17 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; class VKUpdateDescriptorQueue; class VKComputePipeline final { public: - explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, + explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, const SPIRVShader& shader_); @@ -48,7 +48,7 @@ private: vk::Pipeline CreatePipeline() const; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; ShaderEntries entries; diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index f38e089d5..ef9fb5910 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -6,10 +6,10 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -32,7 +32,7 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); } -VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler) +VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ AllocateNewPool()} {} diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 544f32a20..f892be7be 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -7,11 +7,11 @@ #include <vector> #include "video_core/renderer_vulkan/vk_resource_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class VKDescriptorPool; class VKScheduler; @@ -39,7 +39,7 @@ class VKDescriptorPool final { friend DescriptorAllocator; public: - explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler); + explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); ~VKDescriptorPool(); VKDescriptorPool(const VKDescriptorPool&) = delete; @@ -50,7 +50,7 @@ private: vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); - const VKDevice& device; + const Device& device; MasterSemaphore& master_semaphore; std::vector<vk::DescriptorPool> pools; diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 774a12a53..6cd00884d 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -3,24 +3,21 @@ // Refer to the license.txt file included. #include <memory> -#include <thread> #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, - bool is_stubbed_) - : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} +InnerFence::InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_) + : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} -InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, - u32 payload_, bool is_stubbed_) - : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} +InnerFence::InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_) + : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} InnerFence::~InnerFence() = default; @@ -28,63 +25,38 @@ void InnerFence::Queue() { if (is_stubbed) { return; } - ASSERT(!event); - - event = device.GetLogical().CreateEvent(); - ticks = scheduler.CurrentTick(); - - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) { - cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); - }); + // Get the current tick so we can wait for it + wait_tick = scheduler.CurrentTick(); + scheduler.Flush(); } bool InnerFence::IsSignaled() const { if (is_stubbed) { return true; } - ASSERT(event); - return IsEventSignalled(); + return scheduler.IsFree(wait_tick); } void InnerFence::Wait() { if (is_stubbed) { return; } - ASSERT(event); - - if (ticks >= scheduler.CurrentTick()) { - scheduler.Flush(); - } - while (!IsEventSignalled()) { - std::this_thread::yield(); - } -} - -bool InnerFence::IsEventSignalled() const { - switch (const VkResult result = event.GetStatus()) { - case VK_EVENT_SET: - return true; - case VK_EVENT_RESET: - return false; - default: - throw vk::Exception(result); - } + scheduler.Wait(wait_tick); } VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, - const VKDevice& device_, VKScheduler& scheduler_) + VKScheduler& scheduler_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, - device{device_}, scheduler{scheduler_} {} + scheduler{scheduler_} {} Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { - return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); + return std::make_shared<InnerFence>(scheduler, value, is_stubbed); } Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { - return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed); + return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed); } void VKFenceManager::QueueFence(Fence& fence) { diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index c2869e8e3..9c5e5aa8f 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -9,7 +9,7 @@ #include "video_core/fence_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; @@ -21,17 +21,15 @@ class RasterizerInterface; namespace Vulkan { +class Device; class VKBufferCache; -class VKDevice; class VKQueryCache; class VKScheduler; class InnerFence : public VideoCommon::FenceBase { public: - explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, - bool is_stubbed_); - explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, - u32 payload_, bool is_stubbed_); + explicit InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_); + explicit InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_); ~InnerFence(); void Queue(); @@ -41,12 +39,8 @@ public: void Wait(); private: - bool IsEventSignalled() const; - - const VKDevice& device; VKScheduler& scheduler; - vk::Event event; - u64 ticks = 0; + u64 wait_tick = 0; }; using Fence = std::shared_ptr<InnerFence>; @@ -58,7 +52,7 @@ public: explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, - const VKDevice& device_, VKScheduler& scheduler_); + VKScheduler& scheduler_); protected: Fence CreateFence(u32 value, bool is_stubbed) override; @@ -68,7 +62,6 @@ protected: void WaitFence(Fence& fence) override; private: - const VKDevice& device; VKScheduler& scheduler; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7979df3a8..a5214d0bc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -12,12 +12,12 @@ #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -94,7 +94,7 @@ VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { } // Anonymous namespace -VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, +VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, const GraphicsPipelineCacheKey& key, @@ -212,11 +212,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, // state is ignored dynamic.raw1 = 0; dynamic.raw2 = 0; - for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) { - // Enable all vertex bindings - binding.raw = 0; - binding.enabled.Assign(1); - } + dynamic.vertex_strides.fill(0); } else { dynamic = state.dynamic_state; } @@ -224,19 +220,16 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, std::vector<VkVertexInputBindingDescription> vertex_bindings; std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const auto& binding = dynamic.vertex_bindings[index]; - if (!binding.enabled) { + if (state.attributes[index].binding_index_enabled == 0) { continue; } const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ .binding = static_cast<u32>(index), - .stride = binding.stride, + .stride = dynamic.vertex_strides[index], .inputRate = rate, }); - if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast<u32>(index), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 214d06b4c..8b6a98fe0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -13,7 +13,7 @@ #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -40,8 +40,8 @@ static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey> static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); +class Device; class VKDescriptorPool; -class VKDevice; class VKScheduler; class VKUpdateDescriptorQueue; @@ -49,7 +49,7 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt class VKGraphicsPipeline final { public: - explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, + explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, const GraphicsPipelineCacheKey& key, @@ -85,7 +85,7 @@ private: vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, u32 num_color_buffers) const; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; const GraphicsPipelineCacheKey cache_key; const u64 hash; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index ae26e558d..56ec5e380 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -6,15 +6,15 @@ #include <chrono> #include "core/settings.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { using namespace std::chrono_literals; -MasterSemaphore::MasterSemaphore(const VKDevice& device) { +MasterSemaphore::MasterSemaphore(const Device& device) { static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 0e93706d7..f336f1862 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -8,15 +8,15 @@ #include <thread> #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class MasterSemaphore { public: - explicit MasterSemaphore(const VKDevice& device); + explicit MasterSemaphore(const Device& device); ~MasterSemaphore(); /// Returns the current logical tick. diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 56b24b70f..a6abd0eee 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -11,9 +11,9 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -29,7 +29,7 @@ u64 GetAllocationChunkSize(u64 required_size) { class VKMemoryAllocation final { public: - explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_, + explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_, VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) : device{device_}, memory{std::move(memory_)}, properties{properties_}, allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} @@ -104,7 +104,7 @@ private: return std::nullopt; } - const VKDevice& device; ///< Vulkan device. + const Device& device; ///< Vulkan device. const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. const VkMemoryPropertyFlags properties; ///< Vulkan properties. const u64 allocation_size; ///< Size of this allocation. @@ -117,7 +117,7 @@ private: std::vector<const VKMemoryCommitImpl*> commits; }; -VKMemoryManager::VKMemoryManager(const VKDevice& device_) +VKMemoryManager::VKMemoryManager(const Device& device_) : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} VKMemoryManager::~VKMemoryManager() = default; @@ -207,7 +207,7 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi return {}; } -VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, +VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, const vk::DeviceMemory& memory_, u64 begin_, u64 end_) : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 318f8b43e..2452bca4e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -9,12 +9,12 @@ #include <utility> #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +class Device; class MemoryMap; -class VKDevice; class VKMemoryAllocation; class VKMemoryCommitImpl; @@ -22,7 +22,7 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; class VKMemoryManager final { public: - explicit VKMemoryManager(const VKDevice& device_); + explicit VKMemoryManager(const Device& device_); VKMemoryManager(const VKMemoryManager&) = delete; ~VKMemoryManager(); @@ -49,7 +49,7 @@ private: VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, VkMemoryPropertyFlags wanted_properties); - const VKDevice& device; ///< Device handler. + const Device& device; ///< Device handler. const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. }; @@ -59,7 +59,7 @@ class VKMemoryCommitImpl final { friend MemoryMap; public: - explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, + explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, const vk::DeviceMemory& memory_, u64 begin_, u64 end_); ~VKMemoryCommitImpl(); @@ -85,7 +85,7 @@ private: /// Unmaps memory. void Unmap() const; - const VKDevice& device; ///< Vulkan device. + const Device& device; ///< Vulkan device. const vk::DeviceMemory& memory; ///< Vulkan device memory handler. std::pair<u64, u64> interval{}; ///< Interval where the commit exists. VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 083796d05..02282e36f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,17 +19,17 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -149,7 +149,7 @@ Shader::~Shader() = default; VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index fbaa8257c..89d635a3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -20,12 +20,12 @@ #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/async_shaders.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; @@ -33,10 +33,10 @@ class System; namespace Vulkan { +class Device; class RasterizerVulkan; class VKComputePipeline; class VKDescriptorPool; -class VKDevice; class VKScheduler; class VKUpdateDescriptorQueue; @@ -121,7 +121,7 @@ public: explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const VKDevice& device, + Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue); ~VKPipelineCache() override; @@ -148,7 +148,7 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 038760de3..7cadd5147 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -7,11 +7,11 @@ #include <utility> #include <vector> -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -27,7 +27,7 @@ constexpr VkQueryType GetTarget(QueryType type) { } // Anonymous namespace -QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_) +QueryPool::QueryPool(const Device& device_, VKScheduler& scheduler, QueryType type_) : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} QueryPool::~QueryPool() = default; @@ -68,7 +68,7 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - const VKDevice& device_, VKScheduler& scheduler_) + const Device& device_, VKScheduler& scheduler_) : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, query_pools{ QueryPool{device_, scheduler_, QueryType::SamplesPassed}, @@ -96,9 +96,9 @@ void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_, QueryType type_) : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, - query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { - const vk::Device* logical = &cache_.Device().GetLogical(); - cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { + query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { + const vk::Device* logical = &cache.GetDevice().GetLogical(); + cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { logical->ResetQueryPoolEXT(query.first, query.second, 1); cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); }); @@ -109,17 +109,17 @@ HostCounter::~HostCounter() { } void HostCounter::EndQuery() { - cache.Scheduler().Record( + cache.GetScheduler().Record( [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); } u64 HostCounter::BlockingQuery() const { - if (tick >= cache.Scheduler().CurrentTick()) { - cache.Scheduler().Flush(); + if (tick >= cache.GetScheduler().CurrentTick()) { + cache.GetScheduler().Flush(); } u64 data; - const VkResult query_result = cache.Device().GetLogical().GetQueryResults( + const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); @@ -127,7 +127,7 @@ u64 HostCounter::BlockingQuery() const { case VK_SUCCESS: return data; case VK_ERROR_DEVICE_LOST: - cache.Device().ReportLoss(); + cache.GetDevice().ReportLoss(); [[fallthrough]]; default: throw vk::Exception(query_result); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 837fe9ebf..7190946b9 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -12,7 +12,7 @@ #include "common/common_types.h" #include "video_core/query_cache.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace VideoCore { class RasterizerInterface; @@ -21,8 +21,8 @@ class RasterizerInterface; namespace Vulkan { class CachedQuery; +class Device; class HostCounter; -class VKDevice; class VKQueryCache; class VKScheduler; @@ -30,7 +30,7 @@ using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; class QueryPool final : public ResourcePool { public: - explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type); + explicit QueryPool(const Device& device, VKScheduler& scheduler, VideoCore::QueryType type); ~QueryPool() override; std::pair<VkQueryPool, u32> Commit(); @@ -43,7 +43,7 @@ protected: private: static constexpr std::size_t GROW_STEP = 512; - const VKDevice& device; + const Device& device; const VideoCore::QueryType type; std::vector<vk::QueryPool> pools; @@ -55,23 +55,23 @@ class VKQueryCache final public: explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - const VKDevice& device_, VKScheduler& scheduler_); + const Device& device_, VKScheduler& scheduler_); ~VKQueryCache(); std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); - const VKDevice& Device() const noexcept { + const Device& GetDevice() const noexcept { return device; } - VKScheduler& Scheduler() const noexcept { + VKScheduler& GetScheduler() const noexcept { return scheduler; } private: - const VKDevice& device; + const Device& device; VKScheduler& scheduler; std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 04c5c859c..ce3db49bd 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -27,7 +27,6 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -36,9 +35,10 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader_cache.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -62,7 +62,7 @@ namespace { constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); -VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) { +VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; @@ -239,7 +239,7 @@ public: index.type = type; } - void Bind(const VKDevice& device, VKScheduler& scheduler) const { + void Bind(const Device& device, VKScheduler& scheduler) const { // Use this large switch case to avoid dispatching more memory in the record lambda than // what we need. It looks horrible, but it's the best we can do on standard C++. switch (vertex.num_buffers) { @@ -330,7 +330,7 @@ private: } index; template <size_t N> - void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { + void BindStatic(const Device& device, VKScheduler& scheduler) const { if (device.IsExtExtendedDynamicStateSupported()) { if (index.buffer) { BindStatic<N, true, true>(scheduler); @@ -409,7 +409,7 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, - const VKDevice& device_, VKMemoryManager& memory_manager_, + const Device& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_) : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, @@ -428,8 +428,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, staging_pool), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, - fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, - scheduler), + fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { scheduler.SetQueryCache(query_cache); if (device.UseAsynchronousShaders()) { @@ -628,8 +627,10 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET, - descriptor_set, {}); + if (descriptor_set) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + DESCRIPTOR_SET, descriptor_set, nullptr); + } cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 990f9e031..4695718e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -29,8 +29,8 @@ #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/async_shaders.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; @@ -55,7 +55,7 @@ class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - VKScreenInfo& screen_info_, const VKDevice& device_, + VKScreenInfo& screen_info_, const Device& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_); ~RasterizerVulkan() override; @@ -212,7 +212,7 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; VKScreenInfo& screen_info; - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; StateTracker& state_tracker; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c104c6fe3..66004f9c0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -11,13 +11,13 @@ #include "common/microprofile.h" #include "common/thread.h" #include "video_core/renderer_vulkan/vk_command_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -37,7 +37,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { last = nullptr; } -VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_) +VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) : device{device_}, state_tracker{state_tracker_}, master_semaphore{std::make_unique<MasterSemaphore>(device)}, command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 0a36c8fad..4cd43e425 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -12,22 +12,22 @@ #include <utility> #include "common/common_types.h" #include "common/threadsafe_queue.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class CommandPool; +class Device; class Framebuffer; class MasterSemaphore; class StateTracker; -class VKDevice; class VKQueryCache; /// The scheduler abstracts command buffer and fence management with an interface that's able to do /// OpenGL-like operations on Vulkan command buffers. class VKScheduler { public: - explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker); + explicit VKScheduler(const Device& device, StateTracker& state_tracker); ~VKScheduler(); /// Returns the current command buffer tick. @@ -179,7 +179,7 @@ private: void AcquireNewChunk(); - const VKDevice& device; + const Device& device; StateTracker& state_tracker; std::unique_ptr<MasterSemaphore> master_semaphore; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 09d6f9f35..89cbe01ad 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -22,11 +22,11 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader/transform_feedback.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { @@ -272,19 +272,12 @@ bool IsPrecise(Operation operand) { return false; } -u32 ShaderVersion(const VKDevice& device) { - if (device.InstanceApiVersion() < VK_API_VERSION_1_1) { - return 0x00010000; - } - return 0x00010300; -} - class SPIRVDecompiler final : public Sirit::Module { public: - explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_, + explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, const Registry& registry_, const Specialization& specialization_) - : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_}, - header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} { + : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, + registry{registry_}, specialization{specialization_} { if (stage_ != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); } @@ -2749,7 +2742,7 @@ private: }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); - const VKDevice& device; + const Device& device; const ShaderIR& ir; const ShaderType stage; const Tegra::Shader::Header header; @@ -3137,7 +3130,7 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { return entries; } -std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, +std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, ShaderType stage, const VideoCommon::Shader::Registry& registry, const Specialization& specialization) { return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index ad91ad5de..26381e444 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -15,10 +15,8 @@ #include "video_core/shader/shader_ir.h" namespace Vulkan { -class VKDevice; -} -namespace Vulkan { +class Device; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; @@ -109,7 +107,7 @@ struct SPIRVShader { ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); -std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, +std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, Tegra::Engines::ShaderType stage, const VideoCommon::Shader::Registry& registry, const Specialization& specialization); diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 38a0be7f2..aaad4f292 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -7,13 +7,13 @@ #include "common/assert.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) { +vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) { return device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index dce34a140..9517cbe84 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -7,12 +7,12 @@ #include <span> #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; -vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code); +vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 2fd3b7f39..1e0b8b922 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -9,17 +9,17 @@ #include "common/bit_util.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_) : buffer{std::move(buffer_)} {} -VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_, +VKStagingBufferPool::VKStagingBufferPool(const Device& device_, VKMemoryManager& memory_manager_, VKScheduler& scheduler_) : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 2dd5049ac..90dadcbbe 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -10,11 +10,11 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; struct VKBuffer final { @@ -24,7 +24,7 @@ struct VKBuffer final { class VKStagingBufferPool final { public: - explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, + explicit VKStagingBufferPool(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler); ~VKStagingBufferPool(); @@ -58,7 +58,7 @@ private: u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2); - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 419cb154d..a09fe084e 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -10,10 +10,10 @@ #include "common/alignment.h" #include "common/assert.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -60,7 +60,7 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_) +VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_) : device{device_}, scheduler{scheduler_} { CreateBuffers(); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 1428f77bf..2e9c8cb46 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -9,17 +9,17 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class VKFenceWatch; class VKScheduler; class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler); + explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler); ~VKStreamBuffer(); /** @@ -54,7 +54,7 @@ private: void WaitPendingOperations(u64 requested_upper_bound); - const VKDevice& device; ///< Vulkan device manager. + const Device& device; ///< Vulkan device manager. VKScheduler& scheduler; ///< Command scheduler. vk::Buffer buffer; ///< Mapped buffer. diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 9636a7c65..725a2a05d 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -11,10 +11,10 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -56,7 +56,7 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi } // Anonymous namespace -VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_) +VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) : surface{surface_}, device{device_}, scheduler{scheduler_} {} VKSwapchain::~VKSwapchain() = default; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 6b39befdf..2eadd62b3 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -7,7 +7,7 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Layout { struct FramebufferLayout; @@ -15,12 +15,12 @@ struct FramebufferLayout; namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; class VKSwapchain { public: - explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler); + explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); ~VKSwapchain(); /// Creates (or recreates) the swapchain with a given size. @@ -73,7 +73,7 @@ private: void Destroy(); const VkSurfaceKHR surface; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; vk::SwapchainKHR swapchain; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 261808391..bd11de012 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -10,11 +10,13 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -93,7 +95,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { } } -[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) { +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) { const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; if (info.type == ImageType::e2D && info.resources.layers >= 6 && @@ -146,14 +148,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) { +[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { if (info.type == ImageType::Buffer) { return vk::Image{}; } return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) { +[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { if (info.type != ImageType::Buffer) { return vk::Buffer{}; } @@ -205,7 +207,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device, +[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, const ImageView* image_view) { const auto pixel_format = image_view->format; return VkAttachmentDescription{ diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index edc3d80c0..92a7aad8b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -8,8 +8,8 @@ #include <span> #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -19,11 +19,11 @@ using VideoCommon::Offset2D; using VideoCommon::RenderTargets; using VideoCore::Surface::PixelFormat; -class VKDevice; class VKScheduler; class VKStagingBufferPool; class BlitImageHelper; +class Device; class Image; class ImageView; class Framebuffer; @@ -68,7 +68,7 @@ struct ImageBufferMap { }; struct TextureCacheRuntime { - const VKDevice& device; + const Device& device; VKScheduler& scheduler; VKMemoryManager& memory_manager; VKStagingBufferPool& staging_buffer_pool; @@ -104,6 +104,11 @@ struct TextureCacheRuntime { } void InsertUploadMemoryBarrier() {} + + bool HasBrokenTextureViewFormats() const noexcept { + // No known Vulkan driver has broken image views + return false; + } }; class Image : public VideoCommon::ImageBase { @@ -177,7 +182,7 @@ public: private: [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); - const VKDevice* device = nullptr; + const Device* device = nullptr; std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; vk::ImageView depth_view; vk::ImageView stencil_view; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 8826da325..f99273c6a 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -7,14 +7,14 @@ #include "common/assert.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_) +VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) : device{device_}, scheduler{scheduler_} {} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index f098a8540..e214f7195 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -8,11 +8,11 @@ #include <boost/container/static_vector.hpp> #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; struct DescriptorUpdateEntry { @@ -31,7 +31,7 @@ struct DescriptorUpdateEntry { class VKUpdateDescriptorQueue final { public: - explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_); + explicit VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_); ~VKUpdateDescriptorQueue(); void TickFrame(); @@ -69,7 +69,7 @@ public: } private: - const VKDevice& device; + const Device& device; VKScheduler& scheduler; const DescriptorUpdateEntry* upload_start = nullptr; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 09f93463b..9707136e9 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -134,7 +134,7 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, } void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, std::vector<VkDescriptorSetLayoutBinding> bindings, diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 004e214a8..0dbb1a31f 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -24,9 +24,9 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Core::Frontend { class EmuWindow; @@ -94,7 +94,7 @@ public: CompilerSettings compiler_settings, const Registry& registry, VAddr cpu_addr); - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, @@ -123,7 +123,7 @@ private: // For Vulkan Vulkan::VKPipelineCache* pp_cache; - const Vulkan::VKDevice* vk_device; + const Vulkan::Device* vk_device; Vulkan::VKScheduler* scheduler; Vulkan::VKDescriptorPool* descriptor_pool; Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 448a05fcc..959b3f115 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -120,7 +120,9 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i if (lhs.info.type == ImageType::Linear) { base = SubresourceBase{.level = 0, .layer = 0}; } else { - base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS); + // We are passing relaxed formats as an option, having broken views or not won't matter + static constexpr bool broken_views = false; + base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views); } if (!base) { LOG_ERROR(HW_GPU, "Image alias should have been flipped"); diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 076a4bcfd..18f72e508 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i .height = std::max(image_info.size.height >> range.base.level, 1u), .depth = std::max(image_info.size.depth >> range.base.level, 1u), } { - ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format), + ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false), "Image view format {} is incompatible with image format {}", info.format, image_info.format); const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 968059842..d1080300f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -61,7 +61,7 @@ using VideoCore::Surface::SurfaceType; template <class P> class TextureCache { /// Address shift for caching images into a hash table - static constexpr u64 PAGE_SHIFT = 20; + static constexpr u64 PAGE_BITS = 20; /// Enables debugging features to the texture cache static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; @@ -184,8 +184,8 @@ private: template <typename Func> static void ForEachPage(VAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_SHIFT; - for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { if constexpr (RETURNS_BOOL) { if (func(page)) { break; @@ -708,7 +708,7 @@ void TextureCache<P>::InvalidateDepthBuffer() { template <class P> typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { // TODO: Properly implement this - const auto it = page_table.find(cpu_addr >> PAGE_SHIFT); + const auto it = page_table.find(cpu_addr >> PAGE_BITS); if (it == page_table.end()) { return nullptr; } @@ -883,6 +883,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, if (!cpu_addr) { return ImageId{}; } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { @@ -892,11 +893,11 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && existing.pitch == info.pitch && IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format)) { + IsViewCompatible(existing.format, info.format, broken_views)) { image_id = existing_image_id; return true; } - } else if (IsSubresource(info, existing_image, gpu_addr, options)) { + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) { image_id = existing_image_id; return true; } @@ -926,6 +927,7 @@ template <class P> ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); std::vector<ImageId> overlap_ids; std::vector<ImageId> left_aliased_ids; std::vector<ImageId> right_aliased_ids; @@ -940,7 +942,9 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } return; } - const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true); + static constexpr bool strict_size = true; + const std::optional<OverlapResult> solution = + ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views); if (solution) { gpu_addr = solution->gpu_addr; cpu_addr = solution->cpu_addr; @@ -950,9 +954,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options)) { + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) { left_aliased_ids.push_back(overlap_id); - } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, + broken_views)) { right_aliased_ids.push_back(overlap_id); } }); @@ -1165,13 +1170,13 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT); + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); return; } std::vector<ImageId>& image_ids = page_it->second; const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT); + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); return; } image_ids.erase(vector_it); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 9ed1fc007..279932778 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1069,13 +1069,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, VAddr cpu_addr, const ImageBase& overlap, - bool strict_size) { + bool strict_size, bool broken_views) { ASSERT(new_info.type != ImageType::Linear); ASSERT(overlap.info.type != ImageType::Linear); if (!IsLayerStrideCompatible(new_info, overlap.info)) { return std::nullopt; } - if (!IsViewCompatible(overlap.info.format, new_info.format)) { + if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) { return std::nullopt; } if (gpu_addr == overlap.gpu_addr) { @@ -1118,14 +1118,15 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { } std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, - GPUVAddr candidate_addr, RelaxedOptions options) { + GPUVAddr candidate_addr, RelaxedOptions options, + bool broken_views) { const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); if (!base) { return std::nullopt; } const ImageInfo& existing = image.info; if (False(options & RelaxedOptions::Format)) { - if (!IsViewCompatible(existing.format, candidate.format)) { + if (!IsViewCompatible(existing.format, candidate.format, broken_views)) { return std::nullopt; } } @@ -1162,8 +1163,8 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const } bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, - RelaxedOptions options) { - return FindSubresource(candidate, image, candidate_addr, options).has_value(); + RelaxedOptions options, bool broken_views) { + return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value(); } void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index dbbbd33cd..52a9207d6 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -87,17 +87,19 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, VAddr cpu_addr, const ImageBase& overlap, - bool strict_size); + bool strict_size, bool broken_views); [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); [[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, - RelaxedOptions options); + RelaxedOptions options, + bool broken_views); [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, - GPUVAddr candidate_addr, RelaxedOptions options); + GPUVAddr candidate_addr, RelaxedOptions options, + bool broken_views); void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src); diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 5b01020ec..8d10ac29e 100644 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -32,20 +32,11 @@ namespace Vulkan { static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; -NsightAftermathTracker::NsightAftermathTracker() = default; - -NsightAftermathTracker::~NsightAftermathTracker() { - if (initialized) { - (void)GFSDK_Aftermath_DisableGpuCrashDumps(); - } -} - -bool NsightAftermathTracker::Initialize() { +NsightAftermathTracker::NsightAftermathTracker() { if (!dl.Open(AFTERMATH_LIB_NAME)) { LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); - return false; + return; } - if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", &GFSDK_Aftermath_DisableGpuCrashDumps) || !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", @@ -64,27 +55,28 @@ bool NsightAftermathTracker::Initialize() { LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); return false; } - dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; - (void)Common::FS::DeleteDirRecursively(dump_dir); + void(Common::FS::DeleteDirRecursively(dump_dir)); if (!Common::FS::CreateDir(dump_dir)) { LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); - return false; + return; } - if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); - return false; + return; } - LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); - initialized = true; - return true; +} + +NsightAftermathTracker::~NsightAftermathTracker() { + if (initialized) { + (void)GFSDK_Aftermath_DisableGpuCrashDumps(); + } } void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index afe7ae99e..cee3847fb 100644 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -34,8 +34,6 @@ public: NsightAftermathTracker(NsightAftermathTracker&&) = delete; NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; - bool Initialize(); - void SaveShader(const std::vector<u32>& spirv) const; private: @@ -78,9 +76,6 @@ private: #ifndef HAS_NSIGHT_AFTERMATH inline NsightAftermathTracker::NsightAftermathTracker() = default; inline NsightAftermathTracker::~NsightAftermathTracker() = default; -inline bool NsightAftermathTracker::Initialize() { - return false; -} inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} #endif diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp new file mode 100644 index 000000000..ea7af8ad4 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> +#include "common/logging/log.h" +#include "video_core/vulkan_common/vulkan_debug_callback.h" + +namespace Vulkan { +namespace { +VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT type, + const VkDebugUtilsMessengerCallbackDataEXT* data, + [[maybe_unused]] void* user_data) { + const std::string_view message{data->pMessage}; + if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { + LOG_CRITICAL(Render_Vulkan, "{}", message); + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { + LOG_WARNING(Render_Vulkan, "{}", message); + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { + LOG_INFO(Render_Vulkan, "{}", message); + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { + LOG_DEBUG(Render_Vulkan, "{}", message); + } + return VK_FALSE; +} +} // Anonymous namespace + +vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { + return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = Callback, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h new file mode 100644 index 000000000..2efcd244c --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_debug_callback.h @@ -0,0 +1,11 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85b4f0dff..75173324e 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -13,8 +13,9 @@ #include "common/assert.h" #include "core/settings.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/nsight_aftermath_tracker.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -206,17 +207,14 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( } // Anonymous namespace -VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, - VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) +Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { + format_properties{GetFormatProperties(physical, dld)} { + CheckSuitability(); SetupFamilies(surface); SetupFeatures(); -} - -VKDevice::~VKDevice() = default; -bool VKDevice::Create() { const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(); @@ -415,7 +413,7 @@ bool VKDevice::Create() { VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; if (nv_device_diagnostics_config) { - nsight_aftermath_tracker.Initialize(); + nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); diagnostics_nv = { .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, @@ -426,12 +424,7 @@ bool VKDevice::Create() { }; first_next = &diagnostics_nv; } - logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); - if (!logical) { - LOG_ERROR(Render_Vulkan, "Failed to create logical device"); - return false; - } CollectTelemetryParameters(); CollectToolingInfo(); @@ -455,11 +448,12 @@ bool VKDevice::Create() { present_queue = logical.GetQueue(present_family); use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); - return true; } -VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const { +Device::~Device() = default; + +VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { return wanted_format; } @@ -490,18 +484,20 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla return wanted_format; } -void VKDevice::ReportLoss() const { - LOG_CRITICAL(Render_Vulkan, "Device loss occurred!"); +void Device::ReportLoss() const { + LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); // Wait for the log to flush and for Nsight Aftermath to dump the results std::this_thread::sleep_for(std::chrono::seconds{15}); } -void VKDevice::SaveShader(const std::vector<u32>& spirv) const { - nsight_aftermath_tracker.SaveShader(spirv); +void Device::SaveShader(const std::vector<u32>& spirv) const { + if (nsight_aftermath_tracker) { + nsight_aftermath_tracker->SaveShader(spirv); + } } -bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { +bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, @@ -535,7 +531,7 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) return true; } -bool VKDevice::TestDepthStencilBlits() const { +bool Device::TestDepthStencilBlits() const { static constexpr VkFormatFeatureFlags required_features = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; const auto test_features = [](VkFormatProperties props) { @@ -545,8 +541,8 @@ bool VKDevice::TestDepthStencilBlits() const { test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); } -bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const { +bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { const auto it = format_properties.find(wanted_format); if (it == format_properties.end()) { UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); @@ -556,64 +552,45 @@ bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wa return (supported_usage & wanted_usage) == wanted_usage; } -bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { - bool is_suitable = true; +void Device::CheckSuitability() const { std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; - - for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) { + for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { continue; } - const std::string_view name{prop.extensionName}; + const std::string_view name{property.extensionName}; available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; } } - if (!available_extensions.all()) { - for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { - if (available_extensions[i]) { - continue; - } - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); - is_suitable = false; - } - } - - bool has_graphics{}, has_present{}; - const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); - for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { - const auto& family = queue_family_properties[i]; - if (family.queueCount == 0) { + for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { + if (available_extensions[i]) { continue; } - has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT; - has_present |= physical.GetSurfaceSupportKHR(i, surface); - } - if (!has_graphics || !has_present) { - LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); - is_suitable = false; - } - - // TODO(Rodrigo): Check if the device matches all requeriments. - const auto properties{physical.GetProperties()}; - const auto& limits{properties.limits}; - - constexpr u32 required_ubo_size = 65536; - if (limits.maxUniformBufferRange < required_ubo_size) { - LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required", - limits.maxUniformBufferRange, required_ubo_size); - is_suitable = false; + LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); + throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } - - constexpr u32 required_num_viewports = 16; - if (limits.maxViewports < required_num_viewports) { - LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required", - limits.maxViewports, required_num_viewports); - is_suitable = false; + struct LimitTuple { + u32 minimum; + u32 value; + const char* name; + }; + const VkPhysicalDeviceLimits& limits{properties.limits}; + const std::array limits_report{ + LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, + LimitTuple{16, limits.maxViewports, "maxViewports"}, + LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, + LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, + }; + for (const auto& tuple : limits_report) { + if (tuple.value < tuple.minimum) { + LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, + tuple.minimum, tuple.value); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } } - - const auto features{physical.GetFeatures()}; - const std::array feature_report = { + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + const std::array feature_report{ std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), @@ -631,22 +608,16 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), }; - for (const auto& [supported, name] : feature_report) { - if (supported) { + for (const auto& [is_supported, name] : feature_report) { + if (is_supported) { continue; } LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); - is_suitable = false; + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } - - if (!is_suitable) { - LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName); - } - - return is_suitable; } -std::vector<const char*> VKDevice::LoadExtensions() { +std::vector<const char*> Device::LoadExtensions() { std::vector<const char*> extensions; extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); @@ -685,9 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); - if (instance_version >= VK_API_VERSION_1_1) { - test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); - } + test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -801,39 +770,45 @@ std::vector<const char*> VKDevice::LoadExtensions() { return extensions; } -void VKDevice::SetupFamilies(VkSurfaceKHR surface) { - std::optional<u32> graphics_family_, present_family_; - +void Device::SetupFamilies(VkSurfaceKHR surface) { const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); - for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { - if (graphics_family_ && present_family_) + std::optional<u32> graphics; + std::optional<u32> present; + for (u32 index = 0; index < static_cast<u32>(queue_family_properties.size()); ++index) { + if (graphics && (present || !surface)) { break; - - const auto& queue_family = queue_family_properties[i]; - if (queue_family.queueCount == 0) + } + const VkQueueFamilyProperties& queue_family = queue_family_properties[index]; + if (queue_family.queueCount == 0) { continue; - + } if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { - graphics_family_ = i; + graphics = index; } - if (physical.GetSurfaceSupportKHR(i, surface)) { - present_family_ = i; + if (surface && physical.GetSurfaceSupportKHR(index, surface)) { + present = index; } } - ASSERT(graphics_family_ && present_family_); - - graphics_family = *graphics_family_; - present_family = *present_family_; + if (!graphics) { + LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue"); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } + if (surface && !present) { + LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } + graphics_family = *graphics; + present_family = *present; } -void VKDevice::SetupFeatures() { +void Device::SetupFeatures() { const auto supported_features{physical.GetFeatures()}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } -void VKDevice::CollectTelemetryParameters() { +void Device::CollectTelemetryParameters() { VkPhysicalDeviceDriverPropertiesKHR driver{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, .pNext = nullptr, @@ -860,7 +835,7 @@ void VKDevice::CollectTelemetryParameters() { } } -void VKDevice::CollectToolingInfo() { +void Device::CollectToolingInfo() { if (!ext_tooling_info) { return; } @@ -886,7 +861,7 @@ void VKDevice::CollectToolingInfo() { } } -std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { +std::vector<VkDeviceQueueCreateInfo> Device::GetDeviceQueueCreateInfos() const { static constexpr float QUEUE_PRIORITY = 1.0f; std::unordered_set<u32> unique_queue_families{graphics_family, present_family}; diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/vulkan_common/vulkan_device.h index 995dcfc0f..a973c3ce4 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -10,11 +10,12 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +class NsightAftermathTracker; + /// Format usage descriptor. enum class FormatType { Linear, Optimal, Buffer }; @@ -22,14 +23,11 @@ enum class FormatType { Linear, Optimal, Buffer }; const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. -class VKDevice final { +class Device final { public: - explicit VKDevice(VkInstance instance, u32 instance_version, vk::PhysicalDevice physical, - VkSurfaceKHR surface, const vk::InstanceDispatch& dld); - ~VKDevice(); - - /// Initializes the device. Returns true on success. - bool Create(); + explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld); + ~Device(); /** * Returns a format supported by the device for the passed requeriments. @@ -82,11 +80,6 @@ public: return present_family; } - /// Returns the current instance Vulkan API version in Vulkan-formatted version numbers. - u32 InstanceApiVersion() const { - return instance_version; - } - /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. u32 ApiVersion() const { return properties.apiVersion; @@ -232,10 +225,10 @@ public: return use_asynchronous_shaders; } +private: /// Checks if the physical device is suitable. - static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); + void CheckSuitability() const; -private: /// Loads extensions into a vector and stores available ones in this object. std::vector<const char*> LoadExtensions(); @@ -308,7 +301,7 @@ private: std::unordered_map<VkFormat, VkFormatProperties> format_properties; /// Nsight Aftermath GPU crash tracker - NsightAftermathTracker nsight_aftermath_tracker; + std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp new file mode 100644 index 000000000..889ecda0c --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -0,0 +1,151 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <optional> +#include <span> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "common/dynamic_library.h" +#include "common/logging/log.h" +#include "core/frontend/emu_window.h" +#include "video_core/vulkan_common/vulkan_instance.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +// Include these late to avoid polluting previous headers +#ifdef _WIN32 +#include <windows.h> +// ensure include order +#include <vulkan/vulkan_win32.h> +#endif + +#if !defined(_WIN32) && !defined(__APPLE__) +#include <X11/Xlib.h> +#include <vulkan/vulkan_wayland.h> +#include <vulkan/vulkan_xlib.h> +#endif + +namespace Vulkan { +namespace { +[[nodiscard]] std::vector<const char*> RequiredExtensions( + Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { + std::vector<const char*> extensions; + extensions.reserve(6); + switch (window_type) { + case Core::Frontend::WindowSystemType::Headless: + break; +#ifdef _WIN32 + case Core::Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#endif +#if !defined(_WIN32) && !defined(__APPLE__) + case Core::Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; + case Core::Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + if (window_type != Core::Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + if (enable_debug_utils) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + return extensions; +} + +[[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld, + std::span<const char* const> extensions) { + const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); + if (!properties) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return false; + } + for (const char* extension : extensions) { + const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) { + return std::strcmp(extension, prop.extensionName) == 0; + }); + if (it == properties->end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return false; + } + } + return true; +} + +[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) { + std::vector<const char*> layers; + if (enable_layers) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } + return layers; +} + +void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const char*>& layers) { + const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); + if (!layer_properties) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); + layers.clear(); + } + std::erase_if(layers, [&layer_properties](const char* layer) { + const auto comp = [layer](const VkLayerProperties& layer_property) { + return std::strcmp(layer, layer_property.layerName) == 0; + }; + const auto it = std::ranges::find_if(*layer_properties, comp); + if (it == layer_properties->end()) { + LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); + return true; + } + return false; + }); +} +} // Anonymous namespace + +vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + u32 required_version, Core::Frontend::WindowSystemType window_type, + bool enable_debug_utils, bool enable_layers) { + if (!library.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Vulkan library not available"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { + LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + if (!vk::Load(dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils); + if (!AreExtensionsSupported(dld, extensions)) { + throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); + } + std::vector<const char*> layers = Layers(enable_layers); + RemoveUnavailableLayers(dld, layers); + + const u32 available_version = vk::AvailableVersion(dld); + if (available_version < required_version) { + LOG_ERROR(Render_Vulkan, "Vulkan {}.{} is not supported, {}.{} is required", + VK_VERSION_MAJOR(available_version), VK_VERSION_MINOR(available_version), + VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); + throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); + } + vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); + if (!vk::Load(*instance, dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + return instance; +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h new file mode 100644 index 000000000..e5e3a7144 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_instance.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "common/dynamic_library.h" +#include "core/frontend/emu_window.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +/** + * Create a Vulkan instance + * + * @param library Dynamic library to load the Vulkan instance from + * @param dld Dispatch table to load function pointers into + * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) + * @param window_type Window system type's enabled extension + * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not + * @param enable_layers Whether to enable Vulkan validation layers or not + * + * @return A new Vulkan instance + * @throw vk::Exception on failure + */ +[[nodiscard]] vk::Instance CreateInstance( + const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, + Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, + bool enable_debug_utils = false, bool enable_layers = false); + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp new file mode 100644 index 000000000..557871d81 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_library.cpp @@ -0,0 +1,36 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstdlib> +#include <string> + +#include "common/dynamic_library.h" +#include "common/file_util.h" +#include "video_core/vulkan_common/vulkan_library.h" + +namespace Vulkan { + +Common::DynamicLibrary OpenLibrary() { + Common::DynamicLibrary library; +#ifdef __APPLE__ + // Check if a path to a specific Vulkan library has been specified. + char* const libvulkan_env = std::getenv("LIBVULKAN_PATH"); + if (!libvulkan_env || !library.Open(libvulkan_env)) { + // Use the libvulkan.dylib from the application bundle. + const std::string filename = + Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + void(library.Open(filename.c_str())); + } +#else + std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); + if (!library.Open(filename.c_str())) { + // Android devices may not have libvulkan.so.1, only libvulkan.so. + filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); + void(library.Open(filename.c_str())); + } +#endif + return library; +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h new file mode 100644 index 000000000..8b28b0e17 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_library.h @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/dynamic_library.h" + +namespace Vulkan { + +Common::DynamicLibrary OpenLibrary(); + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp new file mode 100644 index 000000000..3c3238f96 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_surface.cpp @@ -0,0 +1,81 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/frontend/emu_window.h" +#include "video_core/vulkan_common/vulkan_surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +// Include these late to avoid polluting previous headers +#ifdef _WIN32 +#include <windows.h> +// ensure include order +#include <vulkan/vulkan_win32.h> +#endif + +#if !defined(_WIN32) && !defined(__APPLE__) +#include <X11/Xlib.h> +#include <vulkan/vulkan_wayland.h> +#include <vulkan/vulkan_xlib.h> +#endif + +namespace Vulkan { + +vk::SurfaceKHR CreateSurface(const vk::Instance& instance, + const Core::Frontend::EmuWindow& emu_window) { + [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch(); + [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo(); + VkSurfaceKHR unsafe_surface = nullptr; + +#ifdef _WIN32 + if (window_info.type == Core::Frontend::WindowSystemType::Windows) { + const HWND hWnd = static_cast<HWND>(window_info.render_surface); + const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, + nullptr, 0, nullptr, hWnd}; + const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); + if (!vkCreateWin32SurfaceKHR || + vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } +#endif +#if !defined(_WIN32) && !defined(__APPLE__) + if (window_info.type == Core::Frontend::WindowSystemType::X11) { + const VkXlibSurfaceCreateInfoKHR xlib_ci{ + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<Display*>(window_info.display_connection), + reinterpret_cast<Window>(window_info.render_surface)}; + const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); + if (!vkCreateXlibSurfaceKHR || + vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } + if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { + const VkWaylandSurfaceCreateInfoKHR wayland_ci{ + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<wl_display*>(window_info.display_connection), + static_cast<wl_surface*>(window_info.render_surface)}; + const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); + if (!vkCreateWaylandSurfaceKHR || + vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != + VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } +#endif + if (!unsafe_surface) { + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + return vk::SurfaceKHR(unsafe_surface, *instance, dld); +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h new file mode 100644 index 000000000..05a169e32 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_surface.h @@ -0,0 +1,18 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Core::Frontend { +class EmuWindow; +} + +namespace Vulkan { + +[[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance, + const Core::Frontend::EmuWindow& emu_window); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2a21e850d..5e15ad607 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -13,7 +13,7 @@ #include "common/common_types.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan::vk { @@ -435,7 +435,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe } Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions, - InstanceDispatch& dispatch) noexcept { + InstanceDispatch& dispatch) { const VkApplicationInfo application_info{ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pNext = nullptr, @@ -455,55 +455,30 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char .enabledExtensionCount = extensions.size(), .ppEnabledExtensionNames = extensions.data(), }; - VkInstance instance; - if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { - // Failed to create the instance. - return {}; - } + Check(dispatch.vkCreateInstance(&ci, nullptr, &instance)); if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) { // We successfully created an instance but the destroy function couldn't be loaded. // This is a good moment to panic. - return {}; + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } - return Instance(instance, dispatch); } -std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() { +std::vector<VkPhysicalDevice> Instance::EnumeratePhysicalDevices() const { u32 num; - if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { - return std::nullopt; - } + Check(dld->vkEnumeratePhysicalDevices(handle, &num, nullptr)); std::vector<VkPhysicalDevice> physical_devices(num); - if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { - return std::nullopt; - } + Check(dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data())); SortPhysicalDevices(physical_devices, *dld); - return std::make_optional(std::move(physical_devices)); + return physical_devices; } -DebugCallback Instance::TryCreateDebugCallback( - PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - const VkDebugUtilsMessengerCreateInfoEXT ci{ - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, - .pNext = nullptr, - .flags = 0, - .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, - .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, - .pfnUserCallback = callback, - .pUserData = nullptr, - }; - - VkDebugUtilsMessengerEXT messenger; - if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { - return {}; - } - return DebugCallback(messenger, handle, *dld); +DebugUtilsMessenger Instance::CreateDebugUtilsMessenger( + const VkDebugUtilsMessengerCreateInfoEXT& create_info) const { + VkDebugUtilsMessengerEXT object; + Check(dld->vkCreateDebugUtilsMessengerEXT(handle, &create_info, nullptr, &object)); + return DebugUtilsMessenger(object, handle, *dld); } void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { @@ -605,7 +580,7 @@ void Semaphore::SetObjectNameEXT(const char* name) const { Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, - DeviceDispatch& dispatch) noexcept { + DeviceDispatch& dispatch) { const VkDeviceCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = next, @@ -618,11 +593,8 @@ Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreate .ppEnabledExtensionNames = enabled_extensions.data(), .pEnabledFeatures = nullptr, }; - VkDevice device; - if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { - return {}; - } + Check(dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device)); Load(device, dispatch); return Device(device, dispatch); } diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index f9a184e00..912cab46c 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -555,7 +555,7 @@ private: const DeviceDispatch* dld = nullptr; }; -using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; +using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; @@ -573,16 +573,25 @@ class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> { using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle; public: - /// Creates a Vulkan instance. Use "operator bool" for error handling. + /// Creates a Vulkan instance. + /// @throw Exception on initialization error. static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions, - InstanceDispatch& dispatch) noexcept; + InstanceDispatch& dispatch); /// Enumerates physical devices. /// @return Physical devices and an empty handle on failure. - std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices(); + /// @throw Exception on Vulkan error. + std::vector<VkPhysicalDevice> EnumeratePhysicalDevices() const; - /// Tries to create a debug callback messenger. Returns an empty handle on failure. - DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; + /// Creates a debug callback messenger. + /// @throw Exception on creation failure. + DebugUtilsMessenger CreateDebugUtilsMessenger( + const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; + + /// Returns dispatch table. + const InstanceDispatch& Dispatch() const noexcept { + return *dld; + } }; class Queue { @@ -787,7 +796,7 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { public: static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, - DeviceDispatch& dispatch) noexcept; + DeviceDispatch& dispatch); Queue GetQueue(u32 family_index) const noexcept; diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index e124836b5..85ee2577d 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -397,7 +397,7 @@ void GRenderWindow::mousePressEvent(QMouseEvent* event) { this->TouchPressed(x, y); } - QWidget::mousePressEvent(event); + emit MouseActivity(); } void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { @@ -411,7 +411,7 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { input_subsystem->GetMouse()->MouseMove(x, y); this->TouchMoved(x, y); - QWidget::mouseMoveEvent(event); + emit MouseActivity(); } void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) { @@ -688,3 +688,10 @@ void GRenderWindow::showEvent(QShowEvent* event) { connect(windowHandle(), &QWindow::screenChanged, this, &GRenderWindow::OnFramebufferSizeChanged, Qt::UniqueConnection); } + +bool GRenderWindow::eventFilter(QObject* object, QEvent* event) { + if (event->type() == QEvent::HoverMove) { + emit MouseActivity(); + } + return false; +} diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index ebe5cb965..339095509 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -184,6 +184,7 @@ signals: void Closed(); void FirstFrameDisplayed(); void ExecuteProgramSignal(std::size_t program_index); + void MouseActivity(); private: void TouchBeginEvent(const QTouchEvent* event); @@ -216,4 +217,5 @@ private: protected: void showEvent(QShowEvent* event) override; + bool eventFilter(QObject* object, QEvent* event) override; }; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 34c2a5f8b..cda448718 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -514,7 +514,7 @@ void Config::ReadControlValues() { Settings::values.emulate_analog_keyboard = ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); - ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), false); + ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true); ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), true); ReadSettingGlobal(Settings::values.enable_accurate_vibrations, @@ -764,6 +764,8 @@ void Config::ReadCpuValues() { ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool(); Settings::values.cpuopt_unsafe_reduce_fp_error = ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool(); + Settings::values.cpuopt_unsafe_inaccurate_nan = + ReadSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true).toBool(); } qt_config->endGroup(); @@ -1174,7 +1176,7 @@ void Config::SaveControlValues() { SaveTouchscreenValues(); SaveMotionTouchValues(); - WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); + WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, true); WriteSettingGlobal(QStringLiteral("vibration_enabled"), Settings::values.vibration_enabled, true); WriteSettingGlobal(QStringLiteral("enable_accurate_vibrations"), @@ -1327,6 +1329,8 @@ void Config::SaveCpuValues() { Settings::values.cpuopt_unsafe_unfuse_fma, true); WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), Settings::values.cpuopt_unsafe_reduce_fp_error, true); + WriteSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), + Settings::values.cpuopt_unsafe_inaccurate_nan, true); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 37fcd6adc..d055cbd60 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -36,6 +36,8 @@ void ConfigureCpu::SetConfiguration() { ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma); ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error); + ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); + ui->cpuopt_unsafe_inaccurate_nan->setChecked(Settings::values.cpuopt_unsafe_inaccurate_nan); } void ConfigureCpu::AccuracyUpdated(int index) { @@ -61,6 +63,7 @@ void ConfigureCpu::ApplyConfiguration() { static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex()); Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked(); Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked(); + Settings::values.cpuopt_unsafe_inaccurate_nan = ui->cpuopt_unsafe_inaccurate_nan->isChecked(); } void ConfigureCpu::changeEvent(QEvent* event) { diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index ebdd2e6e9..bcd0962e9 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -109,6 +109,18 @@ </property> </widget> </item> + <item> + <widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan"> + <property name="text"> + <string>Inaccurate NaN handling</string> + </property> + <property name="toolTip"> + <string> + <div>This option improves speed by removing NaN checking. Please note this also reduces accuracy of certain floating-point instructions.</div> + </string> + </property> + </widget> + </item> </layout> </widget> </item> diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp index c2a7113da..eb8eacbf9 100644 --- a/src/yuzu/configuration/configure_motion_touch.cpp +++ b/src/yuzu/configuration/configure_motion_touch.cpp @@ -51,6 +51,8 @@ CalibrationConfigurationDialog::CalibrationConfigurationDialog(QWidget* parent, case CalibrationConfigurationJob::Status::Completed: text = tr("Configuration completed!"); break; + default: + break; } QMetaObject::invokeMethod(this, "UpdateLabelText", Q_ARG(QString, text)); if (status == CalibrationConfigurationJob::Status::Completed) { diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 0925c10b4..a93b5d3c2 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -14,10 +14,10 @@ #include "core/core.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/k_scheduler.h" -#include "core/hle/kernel/mutex.h" +#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" -#include "core/hle/kernel/synchronization_object.h" +#include "core/hle/kernel/svc_common.h" #include "core/hle/kernel/thread.h" #include "core/memory.h" @@ -116,7 +116,7 @@ QString WaitTreeText::GetText() const { WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table) : mutex_address(mutex_address) { mutex_value = Core::System::GetInstance().Memory().Read32(mutex_address); - owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); + owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Svc::HandleWaitMask); owner = handle_table.Get<Kernel::Thread>(owner_handle); } @@ -127,7 +127,7 @@ QString WaitTreeMutexInfo::GetText() const { } std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeMutexInfo::GetChildren() const { - const bool has_waiters = (mutex_value & Kernel::Mutex::MutexHasWaitersFlag) != 0; + const bool has_waiters = (mutex_value & Kernel::Svc::HandleWaitMask) != 0; std::vector<std::unique_ptr<WaitTreeItem>> list; list.push_back(std::make_unique<WaitTreeText>(tr("has waiters: %1").arg(has_waiters))); @@ -169,7 +169,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons return list; } -WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& o) +WaitTreeSynchronizationObject::WaitTreeSynchronizationObject( + const Kernel::KSynchronizationObject& o) : object(o) {} WaitTreeSynchronizationObject::~WaitTreeSynchronizationObject() = default; @@ -188,7 +189,7 @@ QString WaitTreeSynchronizationObject::GetText() const { } std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::make( - const Kernel::SynchronizationObject& object) { + const Kernel::KSynchronizationObject& object) { switch (object.GetHandleType()) { case Kernel::HandleType::ReadableEvent: return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object)); @@ -202,7 +203,7 @@ std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::ma std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChildren() const { std::vector<std::unique_ptr<WaitTreeItem>> list; - const auto& threads = object.GetWaitingThreads(); + const auto& threads = object.GetWaitingThreadsForDebugging(); if (threads.empty()) { list.push_back(std::make_unique<WaitTreeText>(tr("waited by no thread"))); } else { @@ -211,8 +212,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChi return list; } -WaitTreeObjectList::WaitTreeObjectList( - const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, bool w_all) +WaitTreeObjectList::WaitTreeObjectList(const std::vector<Kernel::KSynchronizationObject*>& list, + bool w_all) : object_list(list), wait_all(w_all) {} WaitTreeObjectList::~WaitTreeObjectList() = default; @@ -237,8 +238,8 @@ WaitTreeThread::~WaitTreeThread() = default; QString WaitTreeThread::GetText() const { const auto& thread = static_cast<const Kernel::Thread&>(object); QString status; - switch (thread.GetStatus()) { - case Kernel::ThreadStatus::Ready: + switch (thread.GetState()) { + case Kernel::ThreadState::Runnable: if (!thread.IsPaused()) { if (thread.WasRunning()) { status = tr("running"); @@ -249,35 +250,39 @@ QString WaitTreeThread::GetText() const { status = tr("paused"); } break; - case Kernel::ThreadStatus::Paused: - status = tr("paused"); - break; - case Kernel::ThreadStatus::WaitHLEEvent: - status = tr("waiting for HLE return"); - break; - case Kernel::ThreadStatus::WaitSleep: - status = tr("sleeping"); - break; - case Kernel::ThreadStatus::WaitIPC: - status = tr("waiting for IPC reply"); - break; - case Kernel::ThreadStatus::WaitSynch: - status = tr("waiting for objects"); - break; - case Kernel::ThreadStatus::WaitMutex: - status = tr("waiting for mutex"); - break; - case Kernel::ThreadStatus::WaitCondVar: - status = tr("waiting for condition variable"); + case Kernel::ThreadState::Waiting: + switch (thread.GetWaitReasonForDebugging()) { + case Kernel::ThreadWaitReasonForDebugging::Sleep: + status = tr("sleeping"); + break; + case Kernel::ThreadWaitReasonForDebugging::IPC: + status = tr("waiting for IPC reply"); + break; + case Kernel::ThreadWaitReasonForDebugging::Synchronization: + status = tr("waiting for objects"); + break; + case Kernel::ThreadWaitReasonForDebugging::ConditionVar: + status = tr("waiting for condition variable"); + break; + case Kernel::ThreadWaitReasonForDebugging::Arbitration: + status = tr("waiting for address arbiter"); + break; + case Kernel::ThreadWaitReasonForDebugging::Suspended: + status = tr("waiting for suspend resume"); + break; + default: + status = tr("waiting"); + break; + } break; - case Kernel::ThreadStatus::WaitArb: - status = tr("waiting for address arbiter"); + case Kernel::ThreadState::Initialized: + status = tr("initialized"); break; - case Kernel::ThreadStatus::Dormant: - status = tr("dormant"); + case Kernel::ThreadState::Terminated: + status = tr("terminated"); break; - case Kernel::ThreadStatus::Dead: - status = tr("dead"); + default: + status = tr("unknown"); break; } @@ -293,8 +298,8 @@ QColor WaitTreeThread::GetColor() const { const std::size_t color_index = IsDarkTheme() ? 1 : 0; const auto& thread = static_cast<const Kernel::Thread&>(object); - switch (thread.GetStatus()) { - case Kernel::ThreadStatus::Ready: + switch (thread.GetState()) { + case Kernel::ThreadState::Runnable: if (!thread.IsPaused()) { if (thread.WasRunning()) { return QColor(WaitTreeColors[0][color_index]); @@ -304,21 +309,24 @@ QColor WaitTreeThread::GetColor() const { } else { return QColor(WaitTreeColors[2][color_index]); } - case Kernel::ThreadStatus::Paused: - return QColor(WaitTreeColors[3][color_index]); - case Kernel::ThreadStatus::WaitHLEEvent: - case Kernel::ThreadStatus::WaitIPC: - return QColor(WaitTreeColors[4][color_index]); - case Kernel::ThreadStatus::WaitSleep: - return QColor(WaitTreeColors[5][color_index]); - case Kernel::ThreadStatus::WaitSynch: - case Kernel::ThreadStatus::WaitMutex: - case Kernel::ThreadStatus::WaitCondVar: - case Kernel::ThreadStatus::WaitArb: - return QColor(WaitTreeColors[6][color_index]); - case Kernel::ThreadStatus::Dormant: + case Kernel::ThreadState::Waiting: + switch (thread.GetWaitReasonForDebugging()) { + case Kernel::ThreadWaitReasonForDebugging::IPC: + return QColor(WaitTreeColors[4][color_index]); + case Kernel::ThreadWaitReasonForDebugging::Sleep: + return QColor(WaitTreeColors[5][color_index]); + case Kernel::ThreadWaitReasonForDebugging::Synchronization: + case Kernel::ThreadWaitReasonForDebugging::ConditionVar: + case Kernel::ThreadWaitReasonForDebugging::Arbitration: + case Kernel::ThreadWaitReasonForDebugging::Suspended: + return QColor(WaitTreeColors[6][color_index]); + break; + default: + return QColor(WaitTreeColors[3][color_index]); + } + case Kernel::ThreadState::Initialized: return QColor(WaitTreeColors[7][color_index]); - case Kernel::ThreadStatus::Dead: + case Kernel::ThreadState::Terminated: return QColor(WaitTreeColors[8][color_index]); default: return WaitTreeItem::GetColor(); @@ -354,11 +362,11 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadID()))); list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)") .arg(thread.GetPriority()) - .arg(thread.GetNominalPriority()))); + .arg(thread.GetBasePriority()))); list.push_back(std::make_unique<WaitTreeText>( tr("last running ticks = %1").arg(thread.GetLastScheduledTick()))); - const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); + const VAddr mutex_wait_address = thread.GetMutexWaitAddressForDebugging(); if (mutex_wait_address != 0) { const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable(); list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table)); @@ -366,9 +374,11 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); } - if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) { - list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(), - thread.IsWaitingSync())); + if (thread.GetState() == Kernel::ThreadState::Waiting && + thread.GetWaitReasonForDebugging() == + Kernel::ThreadWaitReasonForDebugging::Synchronization) { + list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetWaitObjectsForDebugging(), + thread.IsCancellable())); } list.push_back(std::make_unique<WaitTreeCallstack>(thread)); @@ -380,7 +390,7 @@ WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object) : WaitTreeSynchronizationObject(object) {} WaitTreeEvent::~WaitTreeEvent() = default; -WaitTreeThreadList::WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list) +WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::Thread*>& list) : thread_list(list) {} WaitTreeThreadList::~WaitTreeThreadList() = default; diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h index 8e3bc4b24..cf96911ea 100644 --- a/src/yuzu/debugger/wait_tree.h +++ b/src/yuzu/debugger/wait_tree.h @@ -18,8 +18,8 @@ class EmuThread; namespace Kernel { class HandleTable; +class KSynchronizationObject; class ReadableEvent; -class SynchronizationObject; class Thread; } // namespace Kernel @@ -102,30 +102,29 @@ private: class WaitTreeSynchronizationObject : public WaitTreeExpandableItem { Q_OBJECT public: - explicit WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& object); + explicit WaitTreeSynchronizationObject(const Kernel::KSynchronizationObject& object); ~WaitTreeSynchronizationObject() override; static std::unique_ptr<WaitTreeSynchronizationObject> make( - const Kernel::SynchronizationObject& object); + const Kernel::KSynchronizationObject& object); QString GetText() const override; std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; protected: - const Kernel::SynchronizationObject& object; + const Kernel::KSynchronizationObject& object; }; class WaitTreeObjectList : public WaitTreeExpandableItem { Q_OBJECT public: - WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, - bool wait_all); + WaitTreeObjectList(const std::vector<Kernel::KSynchronizationObject*>& list, bool wait_all); ~WaitTreeObjectList() override; QString GetText() const override; std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; private: - const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& object_list; + const std::vector<Kernel::KSynchronizationObject*>& object_list; bool wait_all; }; @@ -150,14 +149,14 @@ public: class WaitTreeThreadList : public WaitTreeExpandableItem { Q_OBJECT public: - explicit WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list); + explicit WaitTreeThreadList(const std::vector<Kernel::Thread*>& list); ~WaitTreeThreadList() override; QString GetText() const override; std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; private: - const std::vector<std::shared_ptr<Kernel::Thread>>& thread_list; + const std::vector<Kernel::Thread*>& thread_list; }; class WaitTreeModel : public QAbstractItemModel { diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 43d64b708..2e74037d1 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -292,12 +292,48 @@ GMainWindow::GMainWindow() connect(&mouse_hide_timer, &QTimer::timeout, this, &GMainWindow::HideMouseCursor); connect(ui.menubar, &QMenuBar::hovered, this, &GMainWindow::ShowMouseCursor); + MigrateConfigFiles(); + + ui.action_Fullscreen->setChecked(false); + QStringList args = QApplication::arguments(); - if (args.length() >= 2) { - BootGame(args[1]); + + if (args.size() < 2) { + return; } - MigrateConfigFiles(); + QString game_path; + + for (int i = 1; i < args.size(); ++i) { + // Preserves drag/drop functionality + if (args.size() == 2 && !args[1].startsWith(QChar::fromLatin1('-'))) { + game_path = args[1]; + break; + } + + // Launch game in fullscreen mode + if (args[i] == QStringLiteral("-f")) { + ui.action_Fullscreen->setChecked(true); + continue; + } + + // Launch game at path + if (args[i] == QStringLiteral("-g")) { + if (i >= args.size() - 1) { + continue; + } + + if (args[i + 1].startsWith(QChar::fromLatin1('-'))) { + continue; + } + + game_path = args[++i]; + } + } + + if (!game_path.isEmpty()) { + BootGame(game_path); + } } GMainWindow::~GMainWindow() { @@ -1058,8 +1094,9 @@ bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) { tr("%1<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the " "yuzu quickstart guide</a> to redump your files.<br>You can refer " "to the yuzu wiki</a> or the yuzu Discord</a> for help.", - "%1 signifies a numeric error ID.") - .arg(error_id); + "%1 signifies an error string.") + .arg(QString::fromStdString( + GetResultStatusString(static_cast<Loader::ResultStatus>(error_id)))); QMessageBox::critical(this, title, description); } else { @@ -1133,6 +1170,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) { [this](std::size_t program_index) { render_window->ExecuteProgram(program_index); }); connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); + connect(render_window, &GRenderWindow::MouseActivity, this, &GMainWindow::OnMouseActivity); // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views // before the CPU continues connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget, @@ -1156,8 +1194,8 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) { if (UISettings::values.hide_mouse) { mouse_hide_timer.start(); - setMouseTracking(true); - ui.centralwidget->setMouseTracking(true); + render_window->installEventFilter(render_window); + render_window->setAttribute(Qt::WA_Hover, true); } std::string title_name; @@ -1234,8 +1272,8 @@ void GMainWindow::ShutdownGame() { } game_list->SetFilterFocus(); - setMouseTracking(false); - ui.centralwidget->setMouseTracking(false); + render_window->removeEventFilter(render_window); + render_window->setAttribute(Qt::WA_Hover, false); UpdateWindowTitle(); @@ -2316,12 +2354,12 @@ void GMainWindow::OnConfigure() { config->Save(); if (UISettings::values.hide_mouse && emulation_running) { - setMouseTracking(true); - ui.centralwidget->setMouseTracking(true); + render_window->installEventFilter(render_window); + render_window->setAttribute(Qt::WA_Hover, true); mouse_hide_timer.start(); } else { - setMouseTracking(false); - ui.centralwidget->setMouseTracking(false); + render_window->removeEventFilter(render_window); + render_window->setAttribute(Qt::WA_Hover, false); } UpdateStatusButtons(); @@ -2561,21 +2599,17 @@ void GMainWindow::HideMouseCursor() { ShowMouseCursor(); return; } - setCursor(QCursor(Qt::BlankCursor)); + render_window->setCursor(QCursor(Qt::BlankCursor)); } void GMainWindow::ShowMouseCursor() { - unsetCursor(); + render_window->unsetCursor(); if (emu_thread != nullptr && UISettings::values.hide_mouse) { mouse_hide_timer.start(); } } -void GMainWindow::mouseMoveEvent(QMouseEvent* event) { - ShowMouseCursor(); -} - -void GMainWindow::mousePressEvent(QMouseEvent* event) { +void GMainWindow::OnMouseActivity() { ShowMouseCursor(); } diff --git a/src/yuzu/main.h b/src/yuzu/main.h index ea6d2c30d..31788ea62 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -248,6 +248,7 @@ private slots: void OnCoreError(Core::System::ResultStatus, std::string); void OnReinitializeKeys(ReinitializeKeyBehavior behavior); void OnLanguageChanged(const QString& locale); + void OnMouseActivity(); private: void RemoveBaseContent(u64 program_id, const QString& entry_type); @@ -335,6 +336,4 @@ protected: void dropEvent(QDropEvent* event) override; void dragEnterEvent(QDragEnterEvent* event) override; void dragMoveEvent(QDragMoveEvent* event) override; - void mouseMoveEvent(QMouseEvent* event) override; - void mousePressEvent(QMouseEvent* event) override; }; diff --git a/src/yuzu/util/url_request_interceptor.cpp b/src/yuzu/util/url_request_interceptor.cpp index 2d491d8c0..b637e771e 100644 --- a/src/yuzu/util/url_request_interceptor.cpp +++ b/src/yuzu/util/url_request_interceptor.cpp @@ -22,6 +22,8 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo& info) { case QWebEngineUrlRequestInfo::ResourceTypeXhr: emit FrameChanged(); break; + default: + break; } } diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 38075c345..41ef6f6b8 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -344,7 +344,7 @@ void Config::ReadValues() { // System Settings::values.use_docked_mode.SetValue( - sdl2_config->GetBoolean("System", "use_docked_mode", false)); + sdl2_config->GetBoolean("System", "use_docked_mode", true)); Settings::values.current_user = std::clamp<int>( sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 2d4b98d9a..3ee0e037d 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -274,7 +274,7 @@ gamecard_path = [System] # Whether the system is docked -# 1: Yes, 0 (default): No +# 1 (default): Yes, 0: No use_docked_mode = # Allow the use of NFC in games diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 39e0d35aa..4faf62ede 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -95,8 +95,6 @@ int main(int argc, char** argv) { int option_index = 0; InitializeLogging(); - - char* endarg; #ifdef _WIN32 int argc_w; auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 91684e96e..0aa143e1f 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp @@ -86,7 +86,7 @@ void Config::ReadValues() { Settings::values.touchscreen.diameter_y = 15; Settings::values.use_docked_mode.SetValue( - sdl2_config->GetBoolean("Controls", "use_docked_mode", false)); + sdl2_config->GetBoolean("Controls", "use_docked_mode", true)); // Data Storage Settings::values.use_virtual_sd = diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 3eb64e9d7..779c3791b 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h @@ -116,7 +116,7 @@ use_virtual_sd = [System] # Whether the system is docked -# 1: Yes, 0 (default): No +# 1 (default): Yes, 0: No use_docked_mode = # Allow the use of NFC in games |