From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 18:27:29 -0400 Subject: Reduce amount of size calculations. --- src/common/common_funcs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/common') diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 8b0d34da6..00a5698f3 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,6 +4,7 @@ #pragma once +#include #include #if !defined(ARCHITECTURE_x86_64) @@ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + } // namespace Common -- cgit v1.2.3 From 345e73f2feb0701e3c3099d002a1c21fb524eae4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 04:17:48 -0300 Subject: video_core: Use un-shifted block sizes to avoid integer divisions Instead of storing all block width, height and depths in their shifted form: block_width = 1U << block_shift; Store them like they are provided by the emulated hardware (their block_shift form). This way we can avoid doing the costly Common::AlignUp operation to align texture sizes and drop CPU integer divisions with bitwise logic (defined in Common::AlignBits). --- src/common/alignment.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/common') diff --git a/src/common/alignment.h b/src/common/alignment.h index d94a2291f..3379a6967 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -19,6 +19,11 @@ constexpr T AlignDown(T value, std::size_t size) { return static_cast(value - value % size); } +template +constexpr T AlignBits(T value, T align) { + return (value + ((1 << align) - 1)) >> align << align; +} + template constexpr bool Is4KBAligned(T value) { static_assert(std::is_unsigned_v, "T must be an unsigned value."); -- cgit v1.2.3 From 94f2be5473182789ec3f6388b43fcd708a505500 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:12:35 -0400 Subject: texture_cache: Optimize GetMipBlockHeight and GetMipBlockDepth --- src/common/bit_util.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'src/common') diff --git a/src/common/bit_util.h b/src/common/bit_util.h index d032df413..6f7d5a947 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) { } #endif +#ifdef _MSC_VER + +inline u32 MostSignificantBit32(const u32 value) { + unsigned long result; + _BitScanReverse(&result, value); + return static_cast(result); +} + +inline u32 MostSignificantBit64(const u64 value) { + unsigned long result; + _BitScanReverse64(&result, value); + return static_cast(result); +} + +#else + +inline u32 MostSignificantBit32(const u32 value) { + return 31U - static_cast(__builtin_clz(value)); +} + +inline u32 MostSignificantBit64(const u64 value) { + return 63U - static_cast(__builtin_clzll(value)); +} + +#endif + +inline u32 Log2Floor32(const u32 value) { + return MostSignificantBit32(value); +} + +inline u32 Log2Ceil32(const u32 value) { + const u32 log2_f = Log2Floor32(value); + return log2_f + ((value ^ (1U << log2_f)) != 0U); +} + +inline u32 Log2Floor64(const u64 value) { + return MostSignificantBit64(value); +} + +inline u32 Log2Ceil64(const u64 value) { + const u64 log2_f = static_cast(Log2Floor64(value)); + return static_cast(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL)); +} + } // namespace Common -- cgit v1.2.3 From 06c4ce86458310870abec90ada68ac393256b9b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Apr 2019 02:07:18 -0300 Subject: shader: Decode SUST and implement backing image functionality --- src/common/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 198b3fe07..8ae05137b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/half_set.cpp" "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/hfma2.cpp" + "${VIDEO_CORE}/shader/decode/image.cpp" "${VIDEO_CORE}/shader/decode/integer_set.cpp" "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/memory.cpp" -- cgit v1.2.3 From de982deb25f685dd8fa67680fb8dd6c627f70859 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 01:47:09 -0300 Subject: common/alignment: Address feedback --- src/common/alignment.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/common') diff --git a/src/common/alignment.h b/src/common/alignment.h index 3379a6967..617b14d9b 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -20,8 +20,9 @@ constexpr T AlignDown(T value, std::size_t size) { } template -constexpr T AlignBits(T value, T align) { - return (value + ((1 << align) - 1)) >> align << align; +constexpr T AlignBits(T value, std::size_t align) { + static_assert(std::is_unsigned_v, "T must be an unsigned value."); + return static_cast((value + ((1ULL << align) - 1)) >> align << align); } template -- cgit v1.2.3 From 3b9d89839dc62e9e63a3cbe9636cf85276babdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 21:10:59 -0400 Subject: texture_cache: Address Feedback --- src/common/CMakeLists.txt | 1 + src/common/binary_find.h | 21 +++++++++++++++++++++ src/common/common_funcs.h | 10 ---------- 3 files changed, 22 insertions(+), 10 deletions(-) create mode 100644 src/common/binary_find.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 8ae05137b..2554add28 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -75,6 +75,7 @@ add_library(common STATIC assert.h detached_tasks.cpp detached_tasks.h + binary_find.h bit_field.h bit_util.h cityhash.cpp diff --git a/src/common/binary_find.h b/src/common/binary_find.h new file mode 100644 index 000000000..5cc523bf9 --- /dev/null +++ b/src/common/binary_find.h @@ -0,0 +1,21 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Common { + +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + +} // namespace Common diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 00a5698f3..04ecac959 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -61,14 +61,4 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } -template > -ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - } // namespace Common -- cgit v1.2.3